def _set_name(self, element_spec): result = {} for key in element_spec: result[key] = TensorSpec(shape=element_spec[key].shape, dtype=element_spec[key].dtype, name=key) return result
def _get_subgraphed_test_ds_spec(neighbourhood_size): return ({'sequence': TensorSpec(shape=(None, 4), dtype=tf.float32, name=None), 'structure': TensorSpec(shape=(None, 3), dtype=tf.float32, name=None), 'predicted_loop_type': TensorSpec(shape=(None, 7), dtype=tf.float32, name=None), 'adjacency_matrix': TensorSpec(shape=(None, neighbourhood_size, neighbourhood_size), dtype=tf.float32, name=None), 'edges_features_matrix': TensorSpec(shape=(None, neighbourhood_size, neighbourhood_size, 3), dtype=tf.float32, name=None), 'seq_scored': TensorSpec(shape=(), dtype=tf.float32, name=None), 'stacked_base_features': TensorSpec(shape=(None, neighbourhood_size, 14), dtype=tf.float32, name=None)})
def define_tfrecord_signature( model, tfrecord_type: str, feature_config: FeatureConfig, preprocessing_keys_to_fns: dict, postprocessing_fn=None, required_fields_only: bool = True, pad_sequence: bool = False, max_sequence_size: int = 0, ): """ Add signatures to the tf keras savedmodel Returns: Serving signature function that accepts a TFRecord string tensor and returns predictions """ # TFRecord Signature # Define a parsing function for tfrecord protos inputs = feature_config.get_all_features(key="node_name", include_label=False) """ NOTE: Setting pad_sequence=False for tfrecord signature as it is used at inference time and we do NOT want to score on padded records for performance reasons Limitation: This limits the serving signature to only run inference on a single query at a time given the current implementation. This is a tricky issue to fix because there is no real way to generate a dense tensor of ranking scores from different queries, as they might have varying number of records in each of them. Workaround: To infer on multiple queries, run predict() on each of the queries separately. """ tfrecord_parse_fn = get_parse_fn( feature_config=feature_config, tfrecord_type=tfrecord_type, preprocessing_keys_to_fns=preprocessing_keys_to_fns, max_sequence_size=max_sequence_size, required_fields_only=required_fields_only, pad_sequence=pad_sequence, ) dtype_map = dict() for feature_info in feature_config.get_all_features(include_label=False): feature_node_name = feature_info.get("node_name", feature_info["name"]) dtype_map[feature_node_name] = feature_config.get_dtype(feature_info) # Define a serving signature for tfrecord @tf.function(input_signature=[TensorSpec(shape=[None], dtype=tf.string)]) def _serve_tfrecord(protos): input_size = tf.shape(protos)[0] features_dict = { feature: TensorArray(dtype=dtype_map[feature], size=input_size) for feature in inputs } # Define loop index i = tf.constant(0) # Define loop condition def loop_condition(i, protos, features_dict): return tf.less(i, input_size) # Define loop body def loop_body(i, protos, features_dict): features, labels = tfrecord_parse_fn(protos[i]) for feature, feature_val in features.items(): features_dict[feature] = features_dict[feature].write(i, feature_val) i += 1 return i, protos, features_dict # Parse all SequenceExample protos to get features _, _, features_dict = tf.while_loop( cond=loop_condition, body=loop_body, loop_vars=[i, protos, features_dict], ) # Convert TensorArray to tensor features_dict = {k: v.stack() for k, v in features_dict.items()} # Run the model to get predictions predictions = model(inputs=features_dict) # Define a post hook if postprocessing_fn: predictions = postprocessing_fn(predictions, features_dict) return predictions return _serve_tfrecord
# save keras2onnx.save_model(onnx_model, ONNX_MODEL_FILE) ################################################################################ # Save: Save the model to a frozen TensorFlow model. # from tensorflow import TensorSpec from tensorflow.io import write_graph from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 print() print(f"Saving a frozen TensorFlow model:") # convert the Keras model to a concrete function spec = TensorSpec(shape=model.inputs[0].shape, dtype=model.inputs[0].dtype) full_model = tf.function(lambda x: model(x)).get_concrete_function(spec) # freeze that concrete function frozen_func = convert_variables_to_constants_v2(full_model) graph_def = frozen_func.graph.as_graph_def() # save the frozen graph to disk write_graph(graph_or_graph_def=graph_def, logdir='.', name=TF_MODEL_FILE, as_text=False) print(f"Frozen model input node: {frozen_func.inputs}") print(f"Frozen model output node: {frozen_func.outputs}")
def _check_signature(spec: tf.TensorSpec, value): # Convert int/float to numpy arrays of dtype np.int64 and np.float64. value = np.asarray(value) self.assertTrue(spec.is_compatible_with(tf.convert_to_tensor(value)))
def _get_subgraphed_train_ds_spec(neighbourhood_size): return _get_subgraphed_test_ds_spec(neighbourhood_size), { 'stacked_scored_labels': TensorSpec(shape=(None, 3), dtype=tf.float32, name=None)}
PRIVATE_TEST_DS_PATH = os.path.join(DATASETS_DIR, 'private_test_ds') SUBGRAPHED_DATASETS_DIR = os.path.join(DATASETS_DIR, 'subgraphed') SUBMISSIONS_DIR = os.path.join(SCRIPT_DIR, 'submissions') SAMPLE_SUBMISSION_PATH = os.path.join(SUBMISSIONS_DIR, 'sample_submission.csv') FEATURE_NAMES = ['sequence', 'structure', 'predicted_loop_type', 'adjacency_matrix', 'edges_features_matrix', 'seq_scored'] ERROR_LABEL_NAMES = ['reactivity_error', 'deg_error_Mg_pH10', 'deg_error_pH10', 'deg_error_Mg_50C', 'deg_error_50C'] NORMAL_LABEL_NAMES = ['reactivity', 'deg_Mg_pH10', 'deg_pH10', 'deg_Mg_50C', 'deg_50C'] ALL_LABEL_NAMES = ERROR_LABEL_NAMES + NORMAL_LABEL_NAMES SCORED_LABEL_NAMES = ['reactivity', 'deg_Mg_pH10', 'deg_Mg_50C'] SUBGRAPH_OPERATION_BATCH_SIZE=64 TEST_DS_SPEC = ({'sequence': TensorSpec(shape=(None, 4), dtype=tf.float32, name=None), 'structure': TensorSpec(shape=(None, 3), dtype=tf.float32, name=None), 'predicted_loop_type': TensorSpec(shape=(None, 7), dtype=tf.float32, name=None), 'adjacency_matrix': TensorSpec(shape=(None, None), dtype=tf.float32, name=None), 'edges_features_matrix': TensorSpec(shape=(None, None, 3), dtype=tf.float32, name=None), 'seq_scored': TensorSpec(shape=(), dtype=tf.float32, name=None), 'stacked_base_features': TensorSpec(shape=(None, 14), dtype=tf.float32, name=None)}) TRAIN_DS_SPEC = (TEST_DS_SPEC, {'stacked_scored_labels': TensorSpec(shape=(None, 3), dtype=tf.float32, name=None)}) def _get_subgraphed_test_ds_spec(neighbourhood_size): return ({'sequence': TensorSpec(shape=(None, 4), dtype=tf.float32, name=None), 'structure': TensorSpec(shape=(None, 3), dtype=tf.float32, name=None), 'predicted_loop_type': TensorSpec(shape=(None, 7), dtype=tf.float32, name=None), 'adjacency_matrix': TensorSpec(shape=(None, neighbourhood_size, neighbourhood_size), dtype=tf.float32,
def _build_saved_model_signatures(self): """ Add signatures to the tf keras savedmodel """ # Default signature # TODO: Define input_signature # @tf.function(input_signature=[]) # def _serve_default(**features): # features_dict = {k: tf.cast(v, tf.float32) for k, v in features.items()} # # Run the model to get predictions # predictions = self.model(inputs=features_dict) # # Mask the padded records # for key, value in predictions.items(): # predictions[key] = tf.where( # tf.equal(features_dict['mask'], 0), # tf.constant(-np.inf), # predictions[key]) # return predictions # TFRecord Signature # Define a parsing function for tfrecord protos inputs = self.feature_config.get_all_features(key="node_name", include_label=False) tfrecord_parse_fn = make_parse_fn(feature_config=self.feature_config, max_num_records=self.max_num_records) # Define a serving signature for tfrecord @tf.function( input_signature=[TensorSpec(shape=[None], dtype=tf.string)]) def _serve_tfrecord(sequence_example_protos): input_size = tf.shape(sequence_example_protos)[0] features_dict = { feature: TensorArray(dtype=tf.float32, size=input_size) for feature in inputs } # Define loop index i = tf.constant(0) # Define loop condition def loop_condition(i, sequence_example_protos, features_dict): return tf.less(i, input_size) # Define loop body def loop_body(i, sequence_example_protos, features_dict): """ TODO: Modify parse_fn from parse_single_sequence_example -> parse_sequence_example to handle a batch of TFRecord proto """ features, labels = tfrecord_parse_fn( sequence_example_protos[i]) for feature, feature_val in features.items(): features_dict[feature] = features_dict[feature].write( i, tf.cast(feature_val, tf.float32)) i += 1 return i, sequence_example_protos, features_dict # Parse all SequenceExample protos to get features _, _, features_dict = tf.while_loop( cond=loop_condition, body=loop_body, loop_vars=[i, sequence_example_protos, features_dict], ) # Convert TensorArray to tensor features_dict = {k: v.stack() for k, v in features_dict.items()} # Run the model to get predictions predictions = self.model(inputs=features_dict) # Mask the padded records for key, value in predictions.items(): predictions[key] = tf.where(tf.equal(features_dict["mask"], 0), tf.constant(0.0), predictions[key]) return predictions return { # ServingSignatureKey.DEFAULT: _serve_default, ServingSignatureKey.TFRECORD: _serve_tfrecord }
def _check_signature(spec: tf.TensorSpec, value: np.ndarray): self.assertTrue( spec.is_compatible_with(tf.convert_to_tensor(value)))
def learn(self): pass def get_epsilon(self, frame_number): if frame_number < self.replay_buffer_start_size: return self.eps_initial elif self.replay_buffer_start_size <= frame_number < self.replay_buffer_start_size + self.eps_annealing_frames: return self.slope * frame_number + self.intercept elif frame_number >= self.replay_buffer_start_size + self.eps_annealing_frames: return self.slope_2 * frame_number + self.intercept_2 if __name__ == "__main__": data_spec = (TensorSpec((84, 84), dtype=tf.uint8, name='state_t'), TensorSpec((), dtype=tf.uint8, name='action'), TensorSpec((), dtype=tf.float32, name='reward'), TensorSpec((84, 84), dtype=tf.uint8, name='state_t1'), TensorSpec((), dtype=tf.bool, name='terminal_flag')) capacity = 10000 batch_size = 32 MAX_EPOCHS = 100 MAX_EPOCH_FRAME = 10000 MAX_EPISODE_LENGTH = 1000 ENV_NAME = "hello world" env = GameEnvironment(ENV_NAME) agent = Agent()
shape=(None, emb_feature_group_dim), dtype=tf.float32, name=None) for f in range(emb_feature_groups_dim))) print(embedding_feature_groups_specs) combined_feature_mappings = { **feature_mappings, **embedding_feature_mappings } print(combined_feature_mappings) for t in embedding_feature_groups_specs: feature_groups_specs += (t, ) print('combined: ' + str(feature_groups_specs)) tensor_spec = ( TensorSpec(shape=(), dtype=tf.int64, name=None ), #enum for sharding, to be removed after loading the data # Keras Input: (inputs, targets, sample_weights) ( feature_groups_specs, #Feature groups, inputs TensorSpec(shape=(None, 1), dtype=tf.float32, name=None), #labels, targets TensorSpec(shape=(None, 1), dtype=tf.float32, name=None))) #weights, sample_weights print(tensor_spec) feature_groups_types = (tuple(tf.float32 for f in range(feature_groups_dim + emb_feature_groups_dim))) print(feature_groups_types) numbers = range(feature_group_dim)
def Test_distribute_datasets(): BATCH_SIZE_PER_REPLICA = 5 strategy = tf.distribute.MirroredStrategy() # download grader_dataset from https://drive.google.com/file/d/19R6PumfmXkRtm8Pmm8tbXfQ3O80CV_hw/view?usp=sharing grader_dataset = tf.data.experimental.load( "grader_dataset", (TensorSpec(shape=(None, None, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))) train_examples = grader_dataset.take(80) validation_examples = grader_dataset.skip(80).take(10) test_examples = validation_examples.skip(10) def format_image(image, label): image = tf.image.resize(image, (224, 224)) / 255.0 return image, label train_batches = train_examples.shuffle( 80 // 4).map(format_image).batch(BATCH_SIZE_PER_REPLICA).prefetch(1) validation_batches = validation_examples.map(format_image).batch( BATCH_SIZE_PER_REPLICA).prefetch(1) test_batches = test_examples.map(format_image).batch(1) train_dist_dataset, val_dist_dataset, test_dist_dataset = learner_mod.distribute_datasets( strategy, train_batches, validation_batches, test_batches) if type(train_dist_dataset) != DistributedDataset: failed_cases = [{ "name": "train_dist_dataset_type_check", "expected": DistributedDataset, "got": type(train_dist_dataset) }] return failed_cases, 1 elif type(val_dist_dataset) != DistributedDataset: failed_cases = [{ "name": "val_dist_dataset_type_check", "expected": DistributedDataset, "got": type(val_dist_dataset) }] return failed_cases, 1 elif type(test_dist_dataset) != DistributedDataset: failed_cases = [{ "name": "test_dist_dataset_type_check", "expected": DistributedDataset, "got": type(test_dist_dataset) }] return failed_cases, 1 else: test_cases = [ { "name": "train_dist_dataset_len_check", "got": len(list(train_dist_dataset)), "expected": len(train_batches) }, { "name": "train_dist_dataset_len_check", "got": len(list(val_dist_dataset)), "expected": len(validation_batches) }, { "name": "train_dist_dataset_len_check", "got": len(list(test_dist_dataset)), "expected": len(test_batches) }, ] failed_cases = get_failed_cases(test_cases) return failed_cases, len(test_cases)
def Test_distributed_train_test_step_fns(): strategy = tf.distribute.MirroredStrategy() BATCH_SIZE_PER_REPLICA = 5 GLOBAL_BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync grader_dataset = tf.data.experimental.load( "grader_dataset", (TensorSpec(shape=(None, None, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))) train_examples = grader_dataset.take(80) validation_examples = grader_dataset.skip(80).take(10) test_examples = grader_dataset.skip(90) def format_image(image, label): image = tf.image.resize(image, (224, 224)) / 255.0 return image, label train_batches = train_examples.shuffle( 80 // 4).map(format_image).batch(5).prefetch(1) validation_batches = validation_examples.map(format_image).batch( 5).prefetch(1) test_batches = test_examples.map(format_image).batch(1) train_dist_dataset, validation_dist_dataset, test_dist_dataset = learner_mod.distribute_datasets( strategy, train_batches, validation_batches, test_batches) MODULE_HANDLE = 'data/resnet_50_feature_vector' class ResNetModel(tf.keras.Model): def __init__(self, classes): super(ResNetModel, self).__init__() self._feature_extractor = hub.KerasLayer(MODULE_HANDLE, trainable=False) self._classifier = tf.keras.layers.Dense(classes, activation='softmax') def call(self, inputs): x = self._feature_extractor(inputs) x = self._classifier(x) return x with strategy.scope(): model = ResNetModel(classes=102) optimizer = tf.keras.optimizers.Adam() loss_object = tf.keras.losses.SparseCategoricalCrossentropy( reduction=tf.keras.losses.Reduction.NONE) def compute_loss(labels, predictions): per_example_loss = loss_object(labels, predictions) return tf.nn.compute_average_loss( per_example_loss, global_batch_size=GLOBAL_BATCH_SIZE) test_loss = tf.keras.metrics.Mean(name='test_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='test_accuracy') train_step, test_step = solution_mod.train_test_step_fns( strategy, model, compute_loss, optimizer, train_accuracy, loss_object, test_loss, test_accuracy) distributed_train_step, distributed_test_step = learner_mod.distributed_train_test_step_fns( strategy, train_step, test_step, model, compute_loss, optimizer, train_accuracy, loss_object, test_loss, test_accuracy) if not callable(distributed_train_step): failed_cases = [{ "name": "distributed_train_step_callable_check", "expected": True, "got": False }] return failed_cases, 1 elif not callable(distributed_test_step): failed_cases = [{ "name": "distributed_test_step_callable_check", "expected": True, "got": False }] return failed_cases, 1 train_result = distributed_train_step(list(train_dist_dataset)[0]) distributed_test_step(list(test_dist_dataset)[0]) test_loss_result1 = test_loss.result() distributed_train_step(list(train_dist_dataset)[0]) test_cases = [ { "name": "train_result_type_check", "got": type(train_result), "expected": EagerTensor }, { "name": "train_result_shape_check", "got": train_result.shape, "expected": () }, { "name": "train_result_dtype_check", "got": train_result.dtype, "expected": tf.float32 }, { "name": "test_loss_result_type_check", "got": type(test_loss_result1), "expected": EagerTensor }, { "name": "test_loss_greater_than_zero", "got": test_loss_result1.numpy() > 0, "expected": True }, ] failed_cases = get_failed_cases(test_cases) return failed_cases, len(test_cases)
def define_tfrecord_signature( model, tfrecord_type: str, feature_config: FeatureConfig, preprocessing_keys_to_fns: dict, postprocessing_fn=None, required_fields_only: bool = True, pad_sequence: bool = False, max_sequence_size: int = 0, ): """ Serving signature that wraps around the keras model trained as a RelevanceModel with a pre-step to parse TFRecords and apply additional feature preprocessing Parameters ---------- model : keras Model Keras model object to be saved tfrecord_type : {"example", "sequence_example"} Type of the TFRecord protobuf that the saved model will be used on at serving time feature_config : `FeatureConfig` object FeatureConfig object that defines the input features into the model and the corresponding feature preprocesing functions to be used in the serving signature preprocessing_keys_to_fns : dict Dictionary mapping function names to tf.functions that should be saved in the preprocessing step of the tfrecord serving signature postprocessing_fn: function custom tensorflow compatible postprocessing function to be used at serving time. Saved as part of the postprocessing layer of the tfrecord serving signature required_fields_only: bool boolean value defining if only required fields need to be added to the tfrecord parsing function at serving time pad_sequence: bool, optional Value defining if sequences should be padded for SequenceExample proto inputs at serving time. Set this to False if you want to not handle padded scores. max_sequence_size : int, optional Maximum sequence size for SequenceExample protobuf The protobuf object will be padded or clipped to this value Returns ------- `tf.function` Serving signature function that accepts a TFRecord string tensor and returns predictions """ # TFRecord Signature # Define a parsing function for tfrecord protos inputs = feature_config.get_all_features(key="node_name", include_label=False) """ NOTE: Setting pad_sequence=False for tfrecord signature as it is used at inference time and we do NOT want to score on padded records for performance reasons Limitation: This limits the serving signature to only run inference on a single query at a time given the current implementation. This is a tricky issue to fix because there is no real way to generate a dense tensor of ranking scores from different queries, as they might have varying number of records in each of them. Workaround: To infer on multiple queries, run predict() on each of the queries separately. """ tfrecord_parse_fn = get_parse_fn( feature_config=feature_config, tfrecord_type=tfrecord_type, preprocessing_keys_to_fns=preprocessing_keys_to_fns, max_sequence_size=max_sequence_size, required_fields_only=required_fields_only, pad_sequence=pad_sequence, ) dtype_map = dict() for feature_info in feature_config.get_all_features(include_label=False): feature_node_name = feature_info.get("node_name", feature_info["name"]) dtype_map[feature_node_name] = feature_config.get_dtype(feature_info) # Define a serving signature for tfrecord @tf.function(input_signature=[TensorSpec(shape=[None], dtype=tf.string)]) def _serve_tfrecord(protos): input_size = tf.shape(protos)[0] features_dict = { feature: TensorArray(dtype=dtype_map[feature], size=input_size) for feature in inputs } # Define loop index i = tf.constant(0) # Define loop condition def loop_condition(i, protos, features_dict): return tf.less(i, input_size) # Define loop body def loop_body(i, protos, features_dict): features, labels = tfrecord_parse_fn(protos[i]) for feature, feature_val in features.items(): features_dict[feature] = features_dict[feature].write( i, feature_val) i += 1 return i, protos, features_dict # Parse all SequenceExample protos to get features _, _, features_dict = tf.while_loop( cond=loop_condition, body=loop_body, loop_vars=[i, protos, features_dict], ) # Convert TensorArray to tensor features_dict = {k: v.stack() for k, v in features_dict.items()} # Run the model to get predictions predictions = model(inputs=features_dict) # Define a post hook if postprocessing_fn: predictions = postprocessing_fn(predictions, features_dict) return predictions return _serve_tfrecord