def _set_name(self, element_spec):
     result = {}
     for key in element_spec:
         result[key] = TensorSpec(shape=element_spec[key].shape,
                                  dtype=element_spec[key].dtype,
                                  name=key)
     return result
def _get_subgraphed_test_ds_spec(neighbourhood_size):
    return ({'sequence': TensorSpec(shape=(None, 4), dtype=tf.float32, name=None),
             'structure': TensorSpec(shape=(None, 3), dtype=tf.float32, name=None),
             'predicted_loop_type': TensorSpec(shape=(None, 7), dtype=tf.float32, name=None),
             'adjacency_matrix': TensorSpec(shape=(None, neighbourhood_size, neighbourhood_size), dtype=tf.float32,
                                            name=None),
             'edges_features_matrix': TensorSpec(shape=(None, neighbourhood_size, neighbourhood_size, 3),
                                                 dtype=tf.float32, name=None),
             'seq_scored': TensorSpec(shape=(), dtype=tf.float32, name=None),
             'stacked_base_features': TensorSpec(shape=(None, neighbourhood_size, 14), dtype=tf.float32, name=None)})
Esempio n. 3
0
def define_tfrecord_signature(
    model,
    tfrecord_type: str,
    feature_config: FeatureConfig,
    preprocessing_keys_to_fns: dict,
    postprocessing_fn=None,
    required_fields_only: bool = True,
    pad_sequence: bool = False,
    max_sequence_size: int = 0,
):
    """
    Add signatures to the tf keras savedmodel

    Returns:
        Serving signature function that accepts a TFRecord string tensor and returns predictions
    """

    # TFRecord Signature
    # Define a parsing function for tfrecord protos
    inputs = feature_config.get_all_features(key="node_name", include_label=False)

    """
    NOTE:
    Setting pad_sequence=False for tfrecord signature as it is used at inference time
    and we do NOT want to score on padded records for performance reasons

    Limitation: This limits the serving signature to only run inference on a single query
    at a time given the current implementation. This is a tricky issue to fix because
    there is no real way to generate a dense tensor of ranking scores from different queries,
    as they might have varying number of records in each of them.

    Workaround: To infer on multiple queries, run predict() on each of the queries separately.
    """

    tfrecord_parse_fn = get_parse_fn(
        feature_config=feature_config,
        tfrecord_type=tfrecord_type,
        preprocessing_keys_to_fns=preprocessing_keys_to_fns,
        max_sequence_size=max_sequence_size,
        required_fields_only=required_fields_only,
        pad_sequence=pad_sequence,
    )

    dtype_map = dict()
    for feature_info in feature_config.get_all_features(include_label=False):
        feature_node_name = feature_info.get("node_name", feature_info["name"])
        dtype_map[feature_node_name] = feature_config.get_dtype(feature_info)

    # Define a serving signature for tfrecord
    @tf.function(input_signature=[TensorSpec(shape=[None], dtype=tf.string)])
    def _serve_tfrecord(protos):
        input_size = tf.shape(protos)[0]
        features_dict = {
            feature: TensorArray(dtype=dtype_map[feature], size=input_size) for feature in inputs
        }

        # Define loop index
        i = tf.constant(0)

        # Define loop condition
        def loop_condition(i, protos, features_dict):
            return tf.less(i, input_size)

        # Define loop body
        def loop_body(i, protos, features_dict):
            features, labels = tfrecord_parse_fn(protos[i])
            for feature, feature_val in features.items():
                features_dict[feature] = features_dict[feature].write(i, feature_val)

            i += 1

            return i, protos, features_dict

        # Parse all SequenceExample protos to get features
        _, _, features_dict = tf.while_loop(
            cond=loop_condition, body=loop_body, loop_vars=[i, protos, features_dict],
        )

        # Convert TensorArray to tensor
        features_dict = {k: v.stack() for k, v in features_dict.items()}

        # Run the model to get predictions
        predictions = model(inputs=features_dict)

        # Define a post hook
        if postprocessing_fn:
            predictions = postprocessing_fn(predictions, features_dict)

        return predictions

    return _serve_tfrecord
Esempio n. 4
0
# save
keras2onnx.save_model(onnx_model, ONNX_MODEL_FILE)

################################################################################
# Save: Save the model to a frozen TensorFlow model.
#

from tensorflow import TensorSpec
from tensorflow.io import write_graph
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2

print()
print(f"Saving a frozen TensorFlow model:")

# convert the Keras model to a concrete function
spec = TensorSpec(shape=model.inputs[0].shape, dtype=model.inputs[0].dtype)
full_model = tf.function(lambda x: model(x)).get_concrete_function(spec)

# freeze that concrete function
frozen_func = convert_variables_to_constants_v2(full_model)
graph_def = frozen_func.graph.as_graph_def()

# save the frozen graph to disk
write_graph(graph_or_graph_def=graph_def,
            logdir='.',
            name=TF_MODEL_FILE,
            as_text=False)

print(f"Frozen model input node:  {frozen_func.inputs}")
print(f"Frozen model output node: {frozen_func.outputs}")
Esempio n. 5
0
 def _check_signature(spec: tf.TensorSpec, value):
   # Convert int/float to numpy arrays of dtype np.int64 and np.float64.
   value = np.asarray(value)
   self.assertTrue(spec.is_compatible_with(tf.convert_to_tensor(value)))
def _get_subgraphed_train_ds_spec(neighbourhood_size):
    return _get_subgraphed_test_ds_spec(neighbourhood_size), {
        'stacked_scored_labels': TensorSpec(shape=(None, 3), dtype=tf.float32, name=None)}
PRIVATE_TEST_DS_PATH = os.path.join(DATASETS_DIR, 'private_test_ds')
SUBGRAPHED_DATASETS_DIR = os.path.join(DATASETS_DIR, 'subgraphed')

SUBMISSIONS_DIR = os.path.join(SCRIPT_DIR, 'submissions')
SAMPLE_SUBMISSION_PATH = os.path.join(SUBMISSIONS_DIR, 'sample_submission.csv')

FEATURE_NAMES = ['sequence', 'structure', 'predicted_loop_type', 'adjacency_matrix', 'edges_features_matrix',
                 'seq_scored']
ERROR_LABEL_NAMES = ['reactivity_error', 'deg_error_Mg_pH10', 'deg_error_pH10', 'deg_error_Mg_50C', 'deg_error_50C']
NORMAL_LABEL_NAMES = ['reactivity', 'deg_Mg_pH10', 'deg_pH10', 'deg_Mg_50C', 'deg_50C']
ALL_LABEL_NAMES = ERROR_LABEL_NAMES + NORMAL_LABEL_NAMES
SCORED_LABEL_NAMES = ['reactivity', 'deg_Mg_pH10', 'deg_Mg_50C']

SUBGRAPH_OPERATION_BATCH_SIZE=64

TEST_DS_SPEC = ({'sequence': TensorSpec(shape=(None, 4), dtype=tf.float32, name=None),
                 'structure': TensorSpec(shape=(None, 3), dtype=tf.float32, name=None),
                 'predicted_loop_type': TensorSpec(shape=(None, 7), dtype=tf.float32, name=None),
                 'adjacency_matrix': TensorSpec(shape=(None, None), dtype=tf.float32, name=None),
                 'edges_features_matrix': TensorSpec(shape=(None, None, 3), dtype=tf.float32, name=None),
                 'seq_scored': TensorSpec(shape=(), dtype=tf.float32, name=None),
                 'stacked_base_features': TensorSpec(shape=(None, 14), dtype=tf.float32, name=None)})

TRAIN_DS_SPEC = (TEST_DS_SPEC, {'stacked_scored_labels': TensorSpec(shape=(None, 3), dtype=tf.float32, name=None)})


def _get_subgraphed_test_ds_spec(neighbourhood_size):
    return ({'sequence': TensorSpec(shape=(None, 4), dtype=tf.float32, name=None),
             'structure': TensorSpec(shape=(None, 3), dtype=tf.float32, name=None),
             'predicted_loop_type': TensorSpec(shape=(None, 7), dtype=tf.float32, name=None),
             'adjacency_matrix': TensorSpec(shape=(None, neighbourhood_size, neighbourhood_size), dtype=tf.float32,
Esempio n. 8
0
    def _build_saved_model_signatures(self):
        """
        Add signatures to the tf keras savedmodel
        """

        # Default signature
        # TODO: Define input_signature
        # @tf.function(input_signature=[])
        # def _serve_default(**features):
        #     features_dict = {k: tf.cast(v, tf.float32) for k, v in features.items()}
        #     # Run the model to get predictions
        #     predictions = self.model(inputs=features_dict)

        #     # Mask the padded records
        #     for key, value in predictions.items():
        #         predictions[key] = tf.where(
        #             tf.equal(features_dict['mask'], 0),
        #             tf.constant(-np.inf),
        #             predictions[key])

        #     return predictions

        # TFRecord Signature
        # Define a parsing function for tfrecord protos
        inputs = self.feature_config.get_all_features(key="node_name",
                                                      include_label=False)
        tfrecord_parse_fn = make_parse_fn(feature_config=self.feature_config,
                                          max_num_records=self.max_num_records)

        # Define a serving signature for tfrecord
        @tf.function(
            input_signature=[TensorSpec(shape=[None], dtype=tf.string)])
        def _serve_tfrecord(sequence_example_protos):
            input_size = tf.shape(sequence_example_protos)[0]
            features_dict = {
                feature: TensorArray(dtype=tf.float32, size=input_size)
                for feature in inputs
            }

            # Define loop index
            i = tf.constant(0)

            # Define loop condition
            def loop_condition(i, sequence_example_protos, features_dict):
                return tf.less(i, input_size)

            # Define loop body
            def loop_body(i, sequence_example_protos, features_dict):
                """
                TODO: Modify parse_fn from
                parse_single_sequence_example -> parse_sequence_example
                to handle a batch of TFRecord proto
                """
                features, labels = tfrecord_parse_fn(
                    sequence_example_protos[i])
                for feature, feature_val in features.items():
                    features_dict[feature] = features_dict[feature].write(
                        i, tf.cast(feature_val, tf.float32))

                i += 1

                return i, sequence_example_protos, features_dict

            # Parse all SequenceExample protos to get features
            _, _, features_dict = tf.while_loop(
                cond=loop_condition,
                body=loop_body,
                loop_vars=[i, sequence_example_protos, features_dict],
            )

            # Convert TensorArray to tensor
            features_dict = {k: v.stack() for k, v in features_dict.items()}

            # Run the model to get predictions
            predictions = self.model(inputs=features_dict)

            # Mask the padded records
            for key, value in predictions.items():
                predictions[key] = tf.where(tf.equal(features_dict["mask"], 0),
                                            tf.constant(0.0), predictions[key])

            return predictions

        return {
            # ServingSignatureKey.DEFAULT: _serve_default,
            ServingSignatureKey.TFRECORD:
            _serve_tfrecord
        }
Esempio n. 9
0
 def _check_signature(spec: tf.TensorSpec, value: np.ndarray):
     self.assertTrue(
         spec.is_compatible_with(tf.convert_to_tensor(value)))
Esempio n. 10
0
    def learn(self):
        pass

    def get_epsilon(self, frame_number):
        if frame_number < self.replay_buffer_start_size:
            return self.eps_initial
        elif self.replay_buffer_start_size <= frame_number < self.replay_buffer_start_size + self.eps_annealing_frames:
            return self.slope * frame_number + self.intercept
        elif frame_number >= self.replay_buffer_start_size + self.eps_annealing_frames:
            return self.slope_2 * frame_number + self.intercept_2


if __name__ == "__main__":

    data_spec = (TensorSpec((84, 84), dtype=tf.uint8, name='state_t'),
                 TensorSpec((), dtype=tf.uint8, name='action'),
                 TensorSpec((), dtype=tf.float32, name='reward'),
                 TensorSpec((84, 84), dtype=tf.uint8, name='state_t1'),
                 TensorSpec((), dtype=tf.bool, name='terminal_flag'))
    capacity = 10000

    batch_size = 32

    MAX_EPOCHS = 100
    MAX_EPOCH_FRAME = 10000
    MAX_EPISODE_LENGTH = 1000
    ENV_NAME = "hello world"
    env = GameEnvironment(ENV_NAME)

    agent = Agent()
Esempio n. 11
0
            shape=(None, emb_feature_group_dim), dtype=tf.float32, name=None)
        for f in range(emb_feature_groups_dim)))
    print(embedding_feature_groups_specs)

    combined_feature_mappings = {
        **feature_mappings,
        **embedding_feature_mappings
    }
    print(combined_feature_mappings)

    for t in embedding_feature_groups_specs:
        feature_groups_specs += (t, )
    print('combined: ' + str(feature_groups_specs))

    tensor_spec = (
        TensorSpec(shape=(), dtype=tf.int64, name=None
                   ),  #enum for sharding, to be removed after loading the data
        # Keras Input: (inputs, targets, sample_weights)
        (
            feature_groups_specs,  #Feature groups, inputs
            TensorSpec(shape=(None, 1), dtype=tf.float32,
                       name=None),  #labels, targets
            TensorSpec(shape=(None, 1), dtype=tf.float32,
                       name=None)))  #weights, sample_weights
    print(tensor_spec)

    feature_groups_types = (tuple(tf.float32
                                  for f in range(feature_groups_dim +
                                                 emb_feature_groups_dim)))
    print(feature_groups_types)

    numbers = range(feature_group_dim)
Esempio n. 12
0
def Test_distribute_datasets():

    BATCH_SIZE_PER_REPLICA = 5

    strategy = tf.distribute.MirroredStrategy()

    # download grader_dataset from https://drive.google.com/file/d/19R6PumfmXkRtm8Pmm8tbXfQ3O80CV_hw/view?usp=sharing
    grader_dataset = tf.data.experimental.load(
        "grader_dataset",
        (TensorSpec(shape=(None, None, 3), dtype=tf.uint8, name=None),
         TensorSpec(shape=(), dtype=tf.int64, name=None)))

    train_examples = grader_dataset.take(80)
    validation_examples = grader_dataset.skip(80).take(10)
    test_examples = validation_examples.skip(10)

    def format_image(image, label):
        image = tf.image.resize(image, (224, 224)) / 255.0
        return image, label

    train_batches = train_examples.shuffle(
        80 // 4).map(format_image).batch(BATCH_SIZE_PER_REPLICA).prefetch(1)
    validation_batches = validation_examples.map(format_image).batch(
        BATCH_SIZE_PER_REPLICA).prefetch(1)
    test_batches = test_examples.map(format_image).batch(1)

    train_dist_dataset, val_dist_dataset, test_dist_dataset = learner_mod.distribute_datasets(
        strategy, train_batches, validation_batches, test_batches)

    if type(train_dist_dataset) != DistributedDataset:
        failed_cases = [{
            "name": "train_dist_dataset_type_check",
            "expected": DistributedDataset,
            "got": type(train_dist_dataset)
        }]
        return failed_cases, 1

    elif type(val_dist_dataset) != DistributedDataset:
        failed_cases = [{
            "name": "val_dist_dataset_type_check",
            "expected": DistributedDataset,
            "got": type(val_dist_dataset)
        }]
        return failed_cases, 1

    elif type(test_dist_dataset) != DistributedDataset:
        failed_cases = [{
            "name": "test_dist_dataset_type_check",
            "expected": DistributedDataset,
            "got": type(test_dist_dataset)
        }]
        return failed_cases, 1

    else:
        test_cases = [
            {
                "name": "train_dist_dataset_len_check",
                "got": len(list(train_dist_dataset)),
                "expected": len(train_batches)
            },
            {
                "name": "train_dist_dataset_len_check",
                "got": len(list(val_dist_dataset)),
                "expected": len(validation_batches)
            },
            {
                "name": "train_dist_dataset_len_check",
                "got": len(list(test_dist_dataset)),
                "expected": len(test_batches)
            },
        ]

        failed_cases = get_failed_cases(test_cases)

        return failed_cases, len(test_cases)
Esempio n. 13
0
def Test_distributed_train_test_step_fns():
    strategy = tf.distribute.MirroredStrategy()

    BATCH_SIZE_PER_REPLICA = 5
    GLOBAL_BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync

    grader_dataset = tf.data.experimental.load(
        "grader_dataset",
        (TensorSpec(shape=(None, None, 3), dtype=tf.uint8, name=None),
         TensorSpec(shape=(), dtype=tf.int64, name=None)))

    train_examples = grader_dataset.take(80)
    validation_examples = grader_dataset.skip(80).take(10)
    test_examples = grader_dataset.skip(90)

    def format_image(image, label):
        image = tf.image.resize(image, (224, 224)) / 255.0
        return image, label

    train_batches = train_examples.shuffle(
        80 // 4).map(format_image).batch(5).prefetch(1)
    validation_batches = validation_examples.map(format_image).batch(
        5).prefetch(1)
    test_batches = test_examples.map(format_image).batch(1)

    train_dist_dataset, validation_dist_dataset, test_dist_dataset = learner_mod.distribute_datasets(
        strategy, train_batches, validation_batches, test_batches)

    MODULE_HANDLE = 'data/resnet_50_feature_vector'

    class ResNetModel(tf.keras.Model):
        def __init__(self, classes):
            super(ResNetModel, self).__init__()
            self._feature_extractor = hub.KerasLayer(MODULE_HANDLE,
                                                     trainable=False)
            self._classifier = tf.keras.layers.Dense(classes,
                                                     activation='softmax')

        def call(self, inputs):
            x = self._feature_extractor(inputs)
            x = self._classifier(x)
            return x

    with strategy.scope():

        model = ResNetModel(classes=102)

        optimizer = tf.keras.optimizers.Adam()

        loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
            reduction=tf.keras.losses.Reduction.NONE)

        def compute_loss(labels, predictions):
            per_example_loss = loss_object(labels, predictions)
            return tf.nn.compute_average_loss(
                per_example_loss, global_batch_size=GLOBAL_BATCH_SIZE)

        test_loss = tf.keras.metrics.Mean(name='test_loss')
        train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
            name='train_accuracy')
        test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
            name='test_accuracy')

        train_step, test_step = solution_mod.train_test_step_fns(
            strategy, model, compute_loss, optimizer, train_accuracy,
            loss_object, test_loss, test_accuracy)

        distributed_train_step, distributed_test_step = learner_mod.distributed_train_test_step_fns(
            strategy, train_step, test_step, model, compute_loss, optimizer,
            train_accuracy, loss_object, test_loss, test_accuracy)

        if not callable(distributed_train_step):
            failed_cases = [{
                "name": "distributed_train_step_callable_check",
                "expected": True,
                "got": False
            }]
            return failed_cases, 1

        elif not callable(distributed_test_step):
            failed_cases = [{
                "name": "distributed_test_step_callable_check",
                "expected": True,
                "got": False
            }]
            return failed_cases, 1

        train_result = distributed_train_step(list(train_dist_dataset)[0])

        distributed_test_step(list(test_dist_dataset)[0])

        test_loss_result1 = test_loss.result()

        distributed_train_step(list(train_dist_dataset)[0])

        test_cases = [
            {
                "name": "train_result_type_check",
                "got": type(train_result),
                "expected": EagerTensor
            },
            {
                "name": "train_result_shape_check",
                "got": train_result.shape,
                "expected": ()
            },
            {
                "name": "train_result_dtype_check",
                "got": train_result.dtype,
                "expected": tf.float32
            },
            {
                "name": "test_loss_result_type_check",
                "got": type(test_loss_result1),
                "expected": EagerTensor
            },
            {
                "name": "test_loss_greater_than_zero",
                "got": test_loss_result1.numpy() > 0,
                "expected": True
            },
        ]

        failed_cases = get_failed_cases(test_cases)

        return failed_cases, len(test_cases)
Esempio n. 14
0
def define_tfrecord_signature(
    model,
    tfrecord_type: str,
    feature_config: FeatureConfig,
    preprocessing_keys_to_fns: dict,
    postprocessing_fn=None,
    required_fields_only: bool = True,
    pad_sequence: bool = False,
    max_sequence_size: int = 0,
):
    """
    Serving signature that wraps around the keras model trained as a RelevanceModel
    with a pre-step to parse TFRecords and apply additional feature preprocessing

    Parameters
    ----------
    model : keras Model
        Keras model object to be saved
    tfrecord_type : {"example", "sequence_example"}
        Type of the TFRecord protobuf that the saved model will be used on at serving time
    feature_config : `FeatureConfig` object
        FeatureConfig object that defines the input features into the model
        and the corresponding feature preprocesing functions to be used
        in the serving signature
    preprocessing_keys_to_fns : dict
        Dictionary mapping function names to tf.functions that should be saved in the preprocessing step of the tfrecord serving signature
    postprocessing_fn: function
        custom tensorflow compatible postprocessing function to be used at serving time.
        Saved as part of the postprocessing layer of the tfrecord serving signature
    required_fields_only: bool
        boolean value defining if only required fields
        need to be added to the tfrecord parsing function at serving time
    pad_sequence: bool, optional
        Value defining if sequences should be padded for SequenceExample proto inputs at serving time.
        Set this to False if you want to not handle padded scores.
    max_sequence_size : int, optional
        Maximum sequence size for SequenceExample protobuf
        The protobuf object will be padded or clipped to this value

    Returns
    -------
    `tf.function`
        Serving signature function that accepts a TFRecord string tensor and returns predictions
    """

    # TFRecord Signature
    # Define a parsing function for tfrecord protos
    inputs = feature_config.get_all_features(key="node_name",
                                             include_label=False)
    """
    NOTE:
    Setting pad_sequence=False for tfrecord signature as it is used at inference time
    and we do NOT want to score on padded records for performance reasons

    Limitation: This limits the serving signature to only run inference on a single query
    at a time given the current implementation. This is a tricky issue to fix because
    there is no real way to generate a dense tensor of ranking scores from different queries,
    as they might have varying number of records in each of them.

    Workaround: To infer on multiple queries, run predict() on each of the queries separately.
    """

    tfrecord_parse_fn = get_parse_fn(
        feature_config=feature_config,
        tfrecord_type=tfrecord_type,
        preprocessing_keys_to_fns=preprocessing_keys_to_fns,
        max_sequence_size=max_sequence_size,
        required_fields_only=required_fields_only,
        pad_sequence=pad_sequence,
    )

    dtype_map = dict()
    for feature_info in feature_config.get_all_features(include_label=False):
        feature_node_name = feature_info.get("node_name", feature_info["name"])
        dtype_map[feature_node_name] = feature_config.get_dtype(feature_info)

    # Define a serving signature for tfrecord
    @tf.function(input_signature=[TensorSpec(shape=[None], dtype=tf.string)])
    def _serve_tfrecord(protos):
        input_size = tf.shape(protos)[0]
        features_dict = {
            feature: TensorArray(dtype=dtype_map[feature], size=input_size)
            for feature in inputs
        }

        # Define loop index
        i = tf.constant(0)

        # Define loop condition
        def loop_condition(i, protos, features_dict):
            return tf.less(i, input_size)

        # Define loop body
        def loop_body(i, protos, features_dict):
            features, labels = tfrecord_parse_fn(protos[i])
            for feature, feature_val in features.items():
                features_dict[feature] = features_dict[feature].write(
                    i, feature_val)

            i += 1

            return i, protos, features_dict

        # Parse all SequenceExample protos to get features
        _, _, features_dict = tf.while_loop(
            cond=loop_condition,
            body=loop_body,
            loop_vars=[i, protos, features_dict],
        )

        # Convert TensorArray to tensor
        features_dict = {k: v.stack() for k, v in features_dict.items()}

        # Run the model to get predictions
        predictions = model(inputs=features_dict)

        # Define a post hook
        if postprocessing_fn:
            predictions = postprocessing_fn(predictions, features_dict)

        return predictions

    return _serve_tfrecord