Beispiel #1
0
def board_to_fen(board_labels: tf.Tensor) -> str:
    board_labels_arr = board_labels.numpy()
    row_strings = []
    for row in board_labels_arr:
        row_string = ""
        empty_squares = 0
        for label in row:
            # TensorFlow uses byte strings so convert to unicode
            unicode_label = str(label, "utf-8")
            if unicode_label == "_":
                empty_squares += 1
            else:
                row_string += str(empty_squares) if empty_squares else ""
                row_string += unicode_label
                empty_squares = 0

        row_string += str(empty_squares) if empty_squares else ""
        row_strings.append(row_string)

    return "/".join(row_strings)
Beispiel #2
0
    def preprocess_standard_light_curve(
            self,
            load_times_fluxes_and_flux_errors_from_path_function: Callable[
                [Path], Tuple[np.ndarray, np.ndarray, Union[np.ndarray, None]]],
            load_auxiliary_information_for_path_function: Callable[[Path], np.ndarray],
            load_label_from_path_function: Callable[[Path], Union[float, np.ndarray]],
            light_curve_path_tensor: tf.Tensor, evaluation_mode: bool = False,
            request_queue: Optional[Queue] = None,
            example_queue: Optional[Queue] = None
    ) -> (np.ndarray, np.ndarray):
        """
        Preprocesses a individual standard light curve from a light curve path tensor, using a passed function defining
        how to load the values from the light curve file and the label value to use. Designed to be used with `partial`
        to prepare a function which will just require the light curve path tensor, and can then be mapped to a dataset.

        :param load_times_fluxes_and_flux_errors_from_path_function: The function to load the light curve times and
                                                                     fluxes from a file.
        :param load_label_from_path_function: The function to load the label to assign to the light curve.
        :param light_curve_path_tensor: The tensor containing the path to the light curve file.
        :param evaluation_mode: Whether or not the preprocessing should occur in evaluation mode (for repeatability).
        :param request_queue: The logging request queue.
        :param example_queue: The logging example queue.
        :return: The example and label arrays shaped for use as single example for the network.
        """
        light_curve_path = Path(light_curve_path_tensor.numpy().decode('utf-8'))
        times, fluxes, flux_errors = load_times_fluxes_and_flux_errors_from_path_function(light_curve_path)
        if self.logger is not None and self.logger.should_produce_example(request_queue):
            light_curve = LightCurve.from_times_and_fluxes(times, fluxes)
            loggable_light_curve = WandbLoggableLightCurve(light_curve_name=light_curve_path.name,
                                                           light_curve=light_curve)
            self.logger.submit_loggable(example_queue, loggable_light_curve)
        light_curve = self.build_light_curve_array(fluxes=fluxes, times=times, flux_errors=flux_errors)
        example = self.preprocess_light_curve(light_curve, evaluation_mode=evaluation_mode)
        label = load_label_from_path_function(light_curve_path)
        label = self.expand_label_to_training_dimensions(label)
        if self.number_of_auxiliary_values > 0:
            auxiliary_information = load_auxiliary_information_for_path_function(light_curve_path)
            return example, auxiliary_information, label
        else:
            return example, label
Beispiel #3
0
def preprocess(tensor: tf.Tensor) -> str:
    """
  Pre-process sequence of text for a translation task

  :param tensor: Eager tf.string tensor (can use .numpy() method)
  :return: str containing the pre-processed sequence
  """
    sentence = tensor.numpy().decode('UTF-8')
    # Converts to lowercase ascii representation
    sentence = unicode_to_ascii(sentence.lower().strip())

    # Creating a space between a word and the punctuation following it
    # eg: "he is a boy." => "he is a boy ."
    sentence = re.sub(r"([?.!,¿])", r" \1 ", sentence)
    sentence = re.sub(r'[" "]+', " ", sentence)

    # Replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
    sentence = re.sub(r"[^a-zA-Z?.!,¿]+", " ", sentence)

    # Removing spaces
    sentence = sentence.rstrip().strip()
    return '<start> ' + sentence + ' <end>'
Beispiel #4
0
def plot_heat_map(image: tf.Tensor, heatmap: tf.Tensor):
    # Setups figure
    fig = plt.figure()
    axes = fig.subplots(1, 3)
    # Plots original image
    axes[1].imshow(image)
    # Plots heatmap
    axes[0].imshow(heatmap.numpy())
    # Plots image with the transparency modified according to heatmap
    alpha = 255 * (tf.image.resize(tf.expand_dims(heatmap, axis=-1),
                                   [IMG_HEIGHT, IMG_WIDTH],
                                   antialias=True) / tf.reduce_max(heatmap))
    axes[2].imshow(
        tf.concat((image, tf.cast(alpha, dtype=image.dtype)), axis=-1))
    # Shows magic!
    axes[0].axis('off')
    axes[1].axis('off')
    axes[2].axis('off')
    plt.savefig(OUTPUTS /
                ('result_' +
                 datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + '.png'))
    plt.show()
def merge_tensor(tensor: tf.Tensor, alignment: List[List[int]]) -> tf.Tensor:
    """ Merge input sub-token attentions into token attentions. """
    def aggregate(a, fun):
        n = len(alignment)
        new = np.zeros(n)
        for i in range(n):
            new[i] = fun(a[alignment[i]])
        return new

    # For attention _to_ a split-up word, we sum up the attention weights
    # over its tokens. For attention _from_ a split-up word, we take the mean
    # of the attention weights over its tokens. In other words, we take the
    # mean over rows, and sum over columns of split tokens according to the
    # alignment. Note that if we go along the axis, the aggregation
    # impacts to orthogonal dimension.
    x = tensor.numpy()
    attention_to = partial(aggregate, fun=np.mean)
    x = np.apply_along_axis(attention_to, 2, x)
    attention_from = partial(aggregate, fun=np.sum)
    x = np.apply_along_axis(attention_from, 3, x)
    x = tf.convert_to_tensor(x)
    return x
    def general_preprocessing(
            self, example_path_tensor: tf.Tensor) -> (tf.Tensor, tf.Tensor):
        """
        Loads and preprocesses the data.

        :param example_path_tensor: The tensor containing the path to the example to load.
        :return: The example and its corresponding label.
        """
        example_path = example_path_tensor.numpy().decode('utf-8')
        tess_data_interface = TessDataInterface()
        fluxes, times = tess_data_interface.load_fluxes_and_times_from_fits_file(
            example_path)
        fluxes = self.normalize(fluxes)
        time_differences = np.diff(times, prepend=times[0])
        example = np.stack([fluxes, time_differences], axis=-1)
        if self.is_positive(example_path):
            label = self.generate_label(example_path, times)
        else:
            label = np.zeros_like(fluxes)
        return tf.convert_to_tensor(
            example, dtype=tf.float32), tf.convert_to_tensor(label,
                                                             dtype=tf.float32)
Beispiel #7
0
def train_single_epoch(model: tf.keras.Model,
                       anchors: tf.Tensor,
                       dataset: tf.data.Dataset,
                       optimizer: tf.optimizers.Optimizer,
                       loss_fn: LossFn,
                       epoch: int,
                       num_classes: int,
                       print_every: int = 10):

    running_loss = tf.metrics.Mean()
    running_clf_loss = tf.metrics.Mean()
    running_reg_loss = tf.metrics.Mean()

    for i, (images, (labels, bbs)) in enumerate(dataset):

        target_reg, target_clf = \
            utils.anchors.anchor_targets_bbox(anchors.numpy(),
                                              images.numpy(),
                                              bbs.numpy(),
                                              labels.numpy(),
                                              num_classes)

        reg_loss, clf_loss = _train_step(model=model,
                                         optimizer=optimizer,
                                         loss_fn=loss_fn,
                                         images=images,
                                         regress_targets=target_reg,
                                         labels=target_clf)

        running_loss(reg_loss + clf_loss)
        running_clf_loss(clf_loss)
        running_reg_loss(reg_loss)

        if (i + 1) % print_every == 0:
            print(f'Epoch[{epoch}] '
                  f'loss: {running_loss.result():.6f} '
                  f'clf. loss: {running_clf_loss.result():.6f} '
                  f'reg. loss: {running_reg_loss.result():.6f} ')
def plot_results(distribution: FlowExample,
                 nhf: NHF,
                 samples: tf.Tensor,
                 granularity=100):
    # Define the right scale
    samples = samples.numpy()
    x_min, y_min = np.nanmin(samples, axis=0)
    x_max, y_max = np.nanmax(samples, axis=0)
    x_range = x_min, x_max
    y_range = y_min, y_max

    axes: List[plt.Axes]
    fig, axes = plt.subplots(1, 3, gridspec_kw=dict(width_ratios=[1, 1, 2]))

    # Make first axes share the same scale
    axes[0].get_shared_x_axes().join(axes[0], axes[1])
    axes[0].get_shared_y_axes().join(axes[0], axes[1])

    axes[0].set_title("Original distribution")
    distribution.plot(x_range,
                      y_range,
                      granularity=granularity,
                      show_samples=samples,
                      ax=axes[0],
                      show=False)

    axes[1].set_title("Predicted distribution")
    grid = nhf.grid_evaluation(x_range, y_range, granularity=granularity)
    axes[1].imshow(grid, extent=(*x_range, *y_range), cmap="coolwarm")

    axes[2].set_title("Losses")
    for k, v in nhf.history.items():
        numpy_values = [i.numpy() for i in v]
        axes[2].plot(numpy_values, label=k)
    plt.legend()

    plt.show()
    return axes
Beispiel #9
0
def is_binary_image(string: tf.Tensor) -> Tuple[bool, str]:
    """Determine image compression type using a binary string tensor/object.

  Args:
    string: binary string, can be `tf.Tensor` or python format..

  Returns:
    a tuple containing a flag denoting whether input string is an image and the corresponding
      extension (if its an image, else empty).
  """
    if not isinstance(string, (bytes, tf.Tensor)):
        raise ValueError(
            f'Input {string} is not a bytes string or `tf.Tensor`.')
    if isinstance(string, tf.Tensor):
        string = string.numpy()
    if string.startswith(b'\xff\xd8\xff'):
        return True, 'jpg'
    elif string.startswith(b'\x89\x50\x4e\x47\x0d\x0a\x1a\x0a'):
        return True, 'png'
    elif string.startswith(b'bm'):
        return True, 'bmp'
    else:
        return False, ''
    def process_image_train(self, img_path: tf.Tensor, purpose):
        """ * Callback function for tf.data.Dataset.map to process each image file path.
            * For each image file path, it returns a corresponding (image, label) pair.
            * This is the parent function that wraps all other processing helpers.
            * This is the processing for the training data specifically.

            Params:
                img_path - tf.Tensor, representing the path to an image file
                purpose - "train" for training; "val" for validation; "test" for testing
            Returns:
                img, label - tuple of (tf.float32, tf.uint8) arrays representing the image and label arrays
        """

        label_path = self.get_label_path(img_path, purpose)

        input_image = self.read_nifti(img_path.numpy(), img_channels=3)
        input_label = self.read_nifti(label_path.numpy(), label=True)

        # Normalize image
        input_image = self.normalize(input_image)

        image_patch, label_patch = self.get_random_patch(
            input_image, input_label)

        if purpose == "train":
            # Augment Data
            image_patch, label_patch = self.augment_patch(
                image_patch, label_patch)

        # Make label binary for tumor region in question
        if self.tumor_region:
            label_patch = tf.where(
                label_patch >= TUMOR_REGIONS[self.tumor_region],
                tf.constant(1, dtype=tf.uint8), tf.constant(0, dtype=tf.uint8))

        return image_patch, label_patch
def cx(parents: tf.Tensor) -> tf.Tensor:
    """
    Creates offspring from two parent arrays using Cycle Crossover operator

    :param parents: Tensor of two individual solutions to participate in crossover

    :return: offspring array being a combination of mother and father arrays
    """
    mother, father = parents.numpy()
    offspring = np.full((mother.shape[0]), -1)

    index = np.random.randint(mother.shape[0])

    while not np.isin(
            mother[index], offspring
    ):  # loop until encountered index which is already in offspring
        offspring[index] = mother[index]
        index = np.where(mother == father[index])

    leftover_indices = np.where(
        offspring < 0)  # pick all empty indices in offspring
    offspring[leftover_indices] = father[leftover_indices]

    return tf.convert_to_tensor(offspring)
Beispiel #12
0
    def add(self, ids: tf.Tensor, values: List[tf.Tensor],
            refs: List[str]) -> None:
        ref_ids_list: List[int] = [1]
        for ref in refs:
            ref_id = self._build_ref_lookup.get(ref)
            if ref_id is None:
                ref_id = len(self._build_ref_lookup) + 1
                self._build_ref_lookup[ref] = ref_id
            ref_ids_list.append(ref_id)
        ref_ids = tf.convert_to_tensor(ref_ids_list, dtype=tf.int32)

        value = tf.cast(tf.ragged.stack(values), tf.int32)
        np_ids = ids.numpy()
        cur_state: int = 0
        for i, id in enumerate(np_ids):
            next_state: int = self._build_matrix[cur_state, id]
            if next_state == 0:
                num_states = len(self._build_states)
                self._build_matrix.resize((num_states + 1, self._vocab_size))
                if i < len(np_ids) - 1:
                    cur_value = tf.RaggedTensor.from_tensor(
                        tf.zeros((0, 0), dtype=tf.int32),
                        row_splits_dtype=tf.int32)
                    cur_ref_ids = tf.convert_to_tensor([], dtype=tf.int32)
                else:
                    cur_value = value
                    cur_ref_ids = ref_ids
                self._build_states.append(cur_value)
                self._build_ref_ids.append(cur_ref_ids)
                self._num_states += 1
                next_state = num_states
                self._build_matrix[cur_state, id] = next_state
            elif i == len(np_ids) - 1:
                self._build_states[next_state] = value
                self._build_ref_ids[next_state] = ref_ids
            cur_state = next_state
Beispiel #13
0
def _get_tensor_value(tensor_or_eager_tensor: tf.Tensor) -> Any:
    if ops.executing_eagerly_outside_functions():
        return tensor_or_eager_tensor.numpy()
    else:
        with tf.compat.v1.Session():
            return tensor_or_eager_tensor.eval()
Beispiel #14
0
 def representation_to_abstract_batch(t: tf.Tensor) -> T.List[T.Any]:
     return kdtree.query(t.numpy())[1].tolist()
Beispiel #15
0
def gif_summary(name: str,
                data: tf.Tensor,
                fps: int,
                step: int = None,
                max_outputs=3):
    """Write a gif summary.

    Args:
        name: A name for this summary. The summary tag used for TensorBoard will
            be this name prefixed by any active name scopes.
        data: A 5-D `uint8` `Tensor` of shape `[k, time, height, width, channels]`
            where `k` is the number of gifs and `channels` is either 1 or 3.
            Any of the dimensions may be statically unknown (i.e., `None`).
            Floating point data will be clipped to the range [0,1).
        fps: frames per second of the gif.
        step: Explicit `int64`-castable monotonic step value for this summary. If
            omitted, this defaults to `tf.summary.experimental.get_step()`, which must
            not be None.
        max_outputs: Optional `int` or rank-0 integer `Tensor`. At most this
            many gifs will be emitted at each step. When more than
            `max_outputs` many gifs are provided, the first `max_outputs` many
            images will be used and the rest silently discarded.
    Returns:
        A scalar `Tensor` of type `string`. The serialized `Summary` protocol buffer.
    """
    summary_scope = tf.summary.experimental.summary_scope(
        name=name,
        default_name='image_summary',
        values=[data, max_outputs, step])

    batch_size, length, height, width, channels = data.shape
    batch_size = min(batch_size, max_outputs)

    with summary_scope as (tag, _):
        tf.debugging.assert_rank(data, 5)

        summary = summary_pb2.Summary()

        if tf.executing_eagerly():
            data = data.numpy()
        else:
            session = tf.compat.v1.keras.backend.get_session()
            data = session.run(data)

        for i in range(batch_size):
            ith_image_summary = summary_pb2.Summary.Image()
            ith_image_summary.height = height
            ith_image_summary.width = width
            ith_image_summary.colorspace = channels

            try:
                ith_image_summary.encoded_image_string = encode_gif(
                    data[i], fps)
            except (IOError, OSError) as exception:
                raise IOError(
                    "Unable to encode images to a gif string because either ffmpeg is "
                    "not installed or ffmpeg returned an error: {}.".format(
                        repr(exception)))

            summary_tag = "{}/gif".format(tag) if (
                batch_size == 1) else "{}/gif/{}".format(tag, i)

            summary.value.add(tag=summary_tag, image=ith_image_summary)

        event = event_pb2.Event(summary=summary)
        event.wall_time = time.time()
        event.step = step

        summary_ops_v2.import_event(event.SerializeToString(), name="scope")
Beispiel #16
0
def main(strategy: tf.distribute.MirroredStrategy, global_step: tf.Tensor,
         train_writer: tf.summary.SummaryWriter,
         eval_writer: tf.summary.SummaryWriter, train_batch_size: int,
         eval_batch_size: int, job_dir: str, dataset_dir: str,
         dataset_filename: str, num_epochs: int, summary_steps: int,
         log_steps: int, dataset_spec: DatasetSpec, model: tf.keras.Model,
         loss_fn: tf.keras.losses.Loss,
         optimizer: tf.keras.optimizers.Optimizer):
    # Define metrics
    eval_metric = tf.keras.metrics.CategoricalAccuracy()
    best_metric = tf.Variable(eval_metric.result())

    # Define training loop

    @distributed_run(strategy)
    def train_step(inputs):
        with tf.GradientTape() as tape:
            images, labels = inputs

            logits = model(images)

            cross_entropy = loss_fn(labels, logits)
            loss = tf.reduce_sum(cross_entropy) / train_batch_size

            gradients = tape.gradient(loss, model.variables)
            optimizer.apply_gradients(zip(gradients, model.variables))

            if global_step % summary_steps == 0:
                tf.summary.scalar('loss', loss, step=global_step)

            return loss

    @distributed_run(strategy)
    def eval_step(inputs, metric):
        images, labels = inputs

        logits = model(images)

        metric.update_state(labels, logits)

    # Build input pipeline
    train_reader = Reader(dataset_dir, dataset_filename, split=Split.Train)
    test_reader = Reader(dataset_dir, dataset_filename, split=Split.Test)
    train_dataset = train_reader.read()
    test_dataset = test_reader.read()

    @unpack_dict
    def map_fn(_id, image, label):
        return tf.cast(image, tf.float32) / 255., label

    train_dataset = dataset_spec.parse(train_dataset).batch(
        train_batch_size).map(map_fn)
    test_dataset = dataset_spec.parse(test_dataset).batch(eval_batch_size).map(
        map_fn)

    #################
    # Training loop #
    #################
    # Define checkpoint
    checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                     model=model,
                                     global_step=global_step,
                                     best_metric=best_metric)
    # Restore the model
    checkpoint_dir = job_dir
    checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt')
    checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

    # Prepare dataset for distributed run
    train_dataset = strategy.experimental_distribute_dataset(train_dataset)
    test_dataset = strategy.experimental_distribute_dataset(test_dataset)

    with CheckpointHandler(checkpoint, checkpoint_prefix):
        for epoch in range(num_epochs):
            print('---------- Epoch: {} ----------'.format(epoch + 1))

            print('Starting training for epoch: {}'.format(epoch + 1))
            with train_writer.as_default():
                for inputs in tqdm(train_dataset,
                                   initial=global_step.numpy(),
                                   desc='Training',
                                   unit=' steps'):
                    per_replica_losses = train_step(inputs)
                    mean_loss = strategy.reduce(tf.distribute.ReduceOp.SUM,
                                                per_replica_losses, None)

                    if global_step.numpy() % log_steps == 0:
                        print('Loss: {}'.format(mean_loss.numpy()))

                    # Increment global step
                    global_step.assign_add(1)

            print('Starting evaluation for epoch: {}'.format(epoch + 1))

            with eval_writer.as_default():
                for inputs in tqdm(test_dataset, desc='Evaluating'):
                    eval_step(inputs, eval_metric)

                accuracy = eval_metric.result()
                print('Accuracy: {}'.format(accuracy.numpy()))
                tf.summary.scalar('accuracy', accuracy, step=global_step)

                if accuracy >= best_metric:
                    checkpoint.save(file_prefix=checkpoint_prefix + '-best')
                    print('The best model saved: {} is higher than {}'.format(
                        accuracy.numpy(), best_metric.numpy()))
                    best_metric.assign(accuracy)

            eval_metric.reset_states()
Beispiel #17
0
def broadcast_to_sample_size(tensor: tf.Tensor, sample_size: int):
    reshaped_tensor = tf.reshape(tensor, shape=(1, ) + tensor.numpy().shape)
    broadcast_tensor = tf.broadcast_to(reshaped_tensor,
                                       shape=(sample_size, ) +
                                       tensor.numpy().shape)
    return broadcast_tensor
 def _calculate_counts(decimal_samples: tf.Tensor) -> Tuple[np.ndarray]:
     return np.unique(decimal_samples.numpy(), return_counts=True)
Beispiel #19
0
def _stringify(scalar: tf.Tensor) -> str:
    """Converts scalar tensor into a Python string."""

    val = scalar.numpy()
    return val.decode('utf-8') if isinstance(val, bytes) else str(val)
 def _(self, tensor: tf.Tensor):
     self.allgatherer.start(tensor.numpy().flatten())
 def _(self, tensor: tf.Tensor):
     self.allgatherer = tnt.Allgatherv(group=self.group,
                                       nelems=int(np.prod(tensor.shape)),
                                       algorithm=self.algorithm,
                                       dtype=tensor.numpy().dtype)
Beispiel #22
0
def upper_case_fn(t: tf.Tensor):
    return t.numpy().decode('utf-8').upper()
Beispiel #23
0
def str_tensor_to_str(str_tensor: tf.Tensor) -> str:
    return str_tensor.numpy().decode('utf-8')
Beispiel #24
0
def tensor_to_numpy(tensor: tf.Tensor):
    return tensor.numpy()
Beispiel #25
0
def tolist(t: tf.Tensor) -> List:
    return t.numpy().tolist()
Beispiel #26
0
def save_tensor_as_image(tensor: tf.Tensor, path: str):
    """Saves a tensor to the given path as a grayscale image.
    """
    image = Image.fromarray(tensor.numpy())
    image.convert(mode="L").save(path)
Beispiel #27
0
def tolist(t: tf.Tensor) -> List:
    if isinstance(t, tf.Tensor):
        t = t.numpy()
    return t.tolist()
Beispiel #28
0
def _to_float(x: tf.Tensor) -> float:
    return x.numpy().item()
Beispiel #29
0
def upper_case_fn(t: tf.Tensor):
    return t.numpy() + placeholder_lang[t.numpy()]
def _as_text_list(value: tf.Tensor) -> List[Text]:
    return [b.decode('utf-8') for b in value.numpy().tolist()]