def _encode_numerical_feature( feature: KerasTensor, name: str, dataset: Optional[BatchDataset], ) -> KerasTensor: """Normalize numerical features. Args: - feature: The input layer of the feature. - name: The feature's name (its column name in the original dataframe). - dataset: The training data, if not specified, return a no-op layer. Returns: The normalized tensor of the input feature. """ # Return generic layer for the tuner initialization if not dataset: return KerasTensor(type_spec=TensorSpec( shape=(None, 1), dtype=tf.float32, name=None)) # Create a Normalization layer for our feature normalizer = Normalization() # Prepare a Dataset that only yields our feature feature_ds = dataset.map(lambda x, y: x[name]) feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1)) # Learn the statistics of the data normalizer.adapt(feature_ds) # Normalize the input feature encoded_feature = normalizer(feature) return encoded_feature
def min_normalizer(): """ Normalizer with minimalistic data.""" adapt_data = np.array([ [1., 2.], [2., 3.], ], dtype=np.float32) normalizer = Normalization() normalizer.adapt(adapt_data) return normalizer
def encode_numerical_feature(feature, name, dataset): normalizer = Normalization() feature_ds = dataset.map(lambda x, y: x[name]) feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1)) normalizer.adapt(feature_ds) encoded_feature = normalizer(feature) return encoded_feature
def encode_numerical_feature(feature, name, dataset): # Create a Keras Normalization Layer for the input feature passed as argument normalizer = Normalization() # Prepare a Dataset containing only the feature feature_dset = dataset.map(lambda x, y: x[name]) feature_dset = feature_dset.map(lambda x: tf.expand_dims(x, -1)) # Learn the statistics of the data and normalise the input feature normalizer.adapt(feature_dset) encoded_feature = normalizer(feature) return encoded_feature
def load_data(): data = pd.read_csv( "https://storage.googleapis.com/tf-datasets/titanic/train.csv") from sklearn.model_selection import train_test_split labels = data.pop('survived') label_names = ["Not survived", "Survived"] features = {} # Converting CSV file into Tensorflow object for name, column in data.items(): dtype = column.dtype if dtype == object: dtype = string else: dtype = float32 features[name] = Input(shape=(1, ), name=name, dtype=dtype) # Extracting and normalizing numeric features numeric_features = { name: feature for name, feature in features.items() if feature.dtype == float32 } x = Concatenate()(list(numeric_features.values())) norm = Normalization() norm.adapt(np.array(data[numeric_features.keys()])) numeric_features = norm(x) processed_features = [numeric_features] # Extracting and normalizing non-numeric features for name, feature in features.items(): if feature.dtype == float32: continue word = StringLookup(vocabulary=np.unique(data[name])) one_hot = CategoryEncoding(max_tokens=word.vocab_size()) x = word(feature) x = one_hot(x) processed_features.append(x) processed_features = Concatenate()(processed_features) processed_features = Model(features, processed_features) utils.plot_model(model=processed_features, rankdir='LR', dpi=72, show_shapes=True) feature_dict = {name: np.array(value) for name, value in data.items()} train_features, test_features, train_labels, test_labels = train_test_split( processed_features(feature_dict).numpy(), labels, test_size=0.2) return train_features, train_labels, test_features, test_labels
def encodeMyFeature(indidualFeature, name, dataset): # Normalization the data normalizer = Normalization() # Pull out a data set for each feature. feature_ds = dataset.map(lambda x, y: x[name]) feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1)) # built in code to find the statistics of the data normalizer.adapt(feature_ds) encodedFeature = normalizer(indidualFeature) return encodedFeature
def encode_numerical_feature(feature, name, dataset): # Create a Normalization layer for our feature normalizer = Normalization() # Prepare a Dataset that only yields our feature feature_ds = dataset.map(lambda x, y: x[name]) feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1)) # Learn the statistics of the data normalizer.adapt(feature_ds) # Normalize the input feature encoded_feature = normalizer(feature) return encoded_feature
def __init__(self, filename, m_input='u', m_output='y'): # Load data data = loadmat(filename) # Normalize input data self.input = data[m_input] self.layernorminput = Normalization() self.layernorminput.adapt(self.input) self.input_norm = self.layernorminput(self.input) # Normalize output data self.output = data[m_output] self.layernormoutput = Normalization() self.layernormoutput.adapt(self.output) self.output_norm = self.layernormoutput(self.output)
def discard_model(input_shape): """ Discard Model Network structure using idea of CNN :param input_shape: data shape :return: keras model class """ k_input = keras.Input(input_shape) x = Normalization()(k_input) for _ in range(3): x = Conv2D(256, (3, 1), padding="same", data_format="channels_last")(x) for _ in range(5): x = residual_block(x, 256, _project_shortcut=True) x = Conv2D(kernel_size=1, strides=1, filters=1, padding="same")(x) x = Flatten()(x) outputs = Dense(34, activation="softmax")(x) # model = keras.applications.ResNet50V2(weights=None, input_shape=(64, 34, 1), classes=34, include_top=True) model = Model(k_input, outputs) model.summary() model.compile(keras.optimizers.Adam(learning_rate=0.008), keras.losses.CategoricalCrossentropy(), metrics=keras.metrics.CategoricalAccuracy()) return model
def hypertune(hp): input_shape = keras.Input((16, 34, 1)) x = input_shape x = Normalization()(x) for i in range(hp.Int('num_conv_layer', 1, 5, default=3)): x = Conv2D(hp.Int('filters_' + str(i), 32, 512, step=32, default=256), (3, 1), padding="same", data_format="channels_last")(x) for i in range( hp.Choice('num_res_block', [5, 10, 20, 30, 40, 50], default=5)): x = residual_block(x, hp.Choice('filters_res_block' + str(i), [64, 128, 256, 512], default=256), _project_shortcut=True) x = residual_block(x, hp.Choice('filters_res_block' + str(i), [64, 128, 256, 512], default=256), _project_shortcut=True) x = Conv2D(kernel_size=1, strides=1, filters=1, padding="same")(x) x = Flatten()(x) outputs = Dense(34, activation="softmax")(x) model = Model(input_shape, outputs) model.summary() model.compile(hp.Choice('optimizer', ['adam', 'sgd', 'Nadam']), keras.losses.CategoricalCrossentropy(), metrics=keras.metrics.CategoricalAccuracy()) return model
def __init__( self, pretrain_dataset: str = None, pooling: str = "max", task: str = "orig_labels", ): """A ResNet50 model that can be pretrained or trained from scratch, adapting to each relevant variation in this project. This is the model class version. WARNING: Although this seems more clean, it doesn't work well with Weights and Biases' callback. Please use func_resnet instead. Args: pretrain_dataset (str, optional): The dataset in which the model is pretrained. If left unspecified, the model starts with random weights. Available options are "imagenet" and "bigearthnet". Defaults to None. pooling (str, optional): The type of global pooling to perform after the ResNet layers. Available options are "max" and "avg". Defaults to "max". task (str, optional): The task on which the model will be trained or fine-tuned on. Available options are "orig_labels" (original labels from the Kaggle challenge) and "deforestation". Defaults to "orig_labels". Raises: Exception: [description] """ super(ResNet, self).__init__() self.pretrain_dataset = pretrain_dataset self.pooling = pooling self.task = task if self.task == "orig_labels": self.n_outputs = 17 elif self.task == "deforestation": self.n_outputs = 1 else: raise Exception( f'ERROR: Unrecognized task "{task}". Please select one of "orig_labels" or "deforestation".' ) if pretrain_dataset == "bigearthnet": self.core = hub.KerasLayer( "https://tfhub.dev/google/remote_sensing/bigearthnet-resnet50/1" ) # TensorFlow Hub modules require data in a [0, 1] range # stats estimated from subset of data in `02_eda_amazon_planet` notebook self.preprocess_input = Normalization( mean=[79.67114306, 87.08461826, 76.46177919], variance=[1857.54070494, 1382.94249315, 1266.69265399], ) else: self.core = ResNet50( include_top=False, weights=pretrain_dataset, pooling=self.pooling, ) # Using TensorFlow's ResNet-specific preprocessing self.preprocess_input = preprocess_input self.classifier = layers.Dense(self.n_outputs, activation="sigmoid")
def rcpk_model(input_shape): """ Riichi, Chi, Pon, Kan models Network structure using idea of CNN :param input_shape: data shape :return: keras model class """ k_input = keras.Input(input_shape) x = Normalization()(k_input) for _ in range(3): x = Conv2D(256, (3, 1), padding="same", data_format="channels_last")(x) for _ in range(5): x = residual_block(x, 256, _project_shortcut=True) for _ in range(3): x = Conv2D(32, (3, 1), padding="same", data_format="channels_last")(x) x = Flatten()(x) x = Dense(1024)(x) x = Dense(256)(x) outputs = Dense(2, activation="softmax")(x) model = Model(k_input, outputs) model.summary() model.compile(keras.optimizers.Adam(learning_rate=0.008), keras.losses.BinaryCrossentropy(), metrics=keras.metrics.Accuracy()) return model
def func_resnet( pretrain_dataset: str = None, pooling: str = "max", task: str = "orig_labels", ): """A ResNet50 model that can be pretrained or trained from scratch, adapting to each relevant variation in this project. This is the functional API version. Works well with Weights and Biases' callback. Args: pretrain_dataset (str, optional): The dataset in which the model is pretrained. If left unspecified, the model starts with random weights. Available options are "imagenet" and "bigearthnet". Defaults to None. pooling (str, optional): The type of global pooling to perform after the ResNet layers. Available options are "max" and "avg". Defaults to "max". task (str, optional): The task on which the model will be trained or fine-tuned on. Available options are "orig_labels" (original labels from the Kaggle challenge) and "deforestation". Defaults to "orig_labels". Raises: Exception: [description] """ inputs = layers.Input(shape=(256, 256, 3)) if task == "orig_labels": n_outputs = 17 elif task == "deforestation": n_outputs = 1 else: raise Exception( f'ERROR: Unrecognized task "{task}". Please select one of "orig_labels" or "deforestation".' ) if pretrain_dataset == "bigearthnet": # TensorFlow Hub modules require data in a [0, 1] range # stats estimated from subset of data in `02_eda_amazon_planet` notebook x = Normalization( mean=[79.67114306, 87.08461826, 76.46177919], variance=[1857.54070494, 1382.94249315, 1266.69265399], )(inputs) x = data_augmentation(x) x = hub.KerasLayer( "https://tfhub.dev/google/remote_sensing/bigearthnet-resnet50/1")( x) else: # Using TensorFlow's ResNet-specific preprocessing x = preprocess_input(x) x = data_augmentation(x) x = ResNet50( include_top=False, weights=pretrain_dataset, pooling=pooling, )(x) outputs = layers.Dense(n_outputs, activation="sigmoid")(x) model = tf.keras.Model(inputs=inputs, outputs=outputs) return model
def train(data): types = set() for file in data["files"]: types.add(file["type"]) # Try to get >100 of each type training_input, training_output = convert_data_to_np(data, list(types)) # n = tf.keras.utils.normalize(training_input, axis=-1, order=2) normalizer = Normalization(axis=-1) normalizer.adapt(training_input) normalized_data = normalizer(training_input) print("var: %.4f" % np.var(normalized_data)) print("mean: %.4f" % np.mean(normalized_data)) dense = keras.layers.Dense(units=16)
def __init__(self, test_size: float = 0.2, validation_size: float = 0.33) -> None: # User-definen constants self.num_targets = 1 self.batch_size = 128 # Load the data set dataset = fetch_california_housing() self.x, self.y = dataset.data, dataset.target self.feature_names = dataset.feature_names self.description = dataset.DESCR # Split the dataset x_train, x_test, y_train, y_test = train_test_split( self.x, self.y, test_size=test_size) x_train, x_val, y_train, y_val = train_test_split( x_train, y_train, test_size=validation_size) # Preprocess x data self.x_train = x_train.astype(np.float32) self.x_test = x_test.astype(np.float32) self.x_val = x_val.astype(np.float32) # Preprocess y data self.y_train = np.reshape(y_train, (-1, self.num_targets)).astype(np.float32) self.y_test = np.reshape(y_test, (-1, self.num_targets)).astype(np.float32) self.y_val = np.reshape(y_val, (-1, self.num_targets)).astype(np.float32) # Dataset attributes self.train_size = self.x_train.shape[0] self.test_size = self.x_test.shape[0] self.num_features = self.x_train.shape[1] self.num_targets = self.y_train.shape[1] # Normalization variables self.normalization_layer = Normalization() self.normalization_layer.adapt(self.x_train) # tf.data Datasets self.train_dataset = tf.data.Dataset.from_tensor_slices( (self.x_train, self.y_train)) self.test_dataset = tf.data.Dataset.from_tensor_slices( (self.x_test, self.y_test)) self.val_dataset = tf.data.Dataset.from_tensor_slices( (self.x_val, self.y_val)) self.train_dataset = self._prepare_dataset(self.train_dataset, shuffle=True) self.test_dataset = self._prepare_dataset(self.test_dataset) self.val_dataset = self._prepare_dataset(self.val_dataset)
def cifar_standardization(x, mode='FEATURE_NORMALIZE', data_samples=None): mode = mode.upper() assert mode in ['FEATURE_NORMALIZE', 'PIXEL_MEAN_SUBTRACT'] if mode == 'PIXEL_MEAN_SUBTRACT' and not data_samples: raise ValueError('`data_samples` argument should not be `None`, ' 'when `mode="PIXEL_MEAN_SUBTRACT"`.') if mode == 'FEATURE_NORMALIZE': cifar_mean = tf.cast(CIFAR_MEAN, tf.float32).numpy() cifar_std = tf.cast(CIFAR_STD, tf.float32).numpy() x = Rescaling(scale=1. / cifar_std, offset=-(cifar_mean / cifar_std), name='mean_normalization')(x) elif mode == 'PIXEL_MEAN_SUBTRACT': mean_subtraction_layer = Normalization(axis=[1, 2, 3], name='pixel_mean_subtraction') mean_subtraction_layer.adapt(data_samples) # set values of variance = 1. and keep mean values as is mean_pixels = mean_subtraction_layer.get_weights()[0] mean_subtraction_layer.set_weights( [mean_pixels, tf.ones_like(mean_pixels)]) x = mean_subtraction_layer(x) x = Rescaling(scale=1 / 255., name='rescaling')(x) return x
def create(self, params=None): self.set_params(params) inputs = Input(shape=(self.input_shape, )) norm = Normalization()(inputs) dense_1 = Dense(32, activation="relu")(norm) relu_1 = ReLU()(dense_1) dense_2 = Dense(32, activation="relu")(relu_1) outputs = Dense(1, activation="sigmoid")(dense_2) self.model = Model(inputs=inputs, outputs=outputs, name=self.name)
def __init__(self, excel_file_path: str, test_size: float = 0.2, validation_size: float = 0.33) -> None: # Load the dataset dataset = load_dataset(excel_file_path) self.x = dataset["data"] self.y = dataset["target"] self.feature_names = dataset["feature_names"] # User-definen constants self.num_targets = 1 self.batch_size = 128 # Split the dataset x_train, x_test, y_train, y_test = train_test_split(self.x, self.y, test_size=test_size) x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=validation_size) # Preprocess x data self.x_train = x_train.astype(np.float32) self.x_test = x_test.astype(np.float32) self.x_val = x_val.astype(np.float32) # Preprocess y data self.y_train = np.reshape(y_train, (-1, self.num_targets)).astype(np.float32) self.y_test = np.reshape(y_test, (-1, self.num_targets)).astype(np.float32) self.y_val = np.reshape(y_val, (-1, self.num_targets)).astype(np.float32) # Dataset attributes self.train_size = self.x_train.shape[0] self.test_size = self.x_test.shape[0] self.num_features = self.x_train.shape[1] self.num_targets = self.y_train.shape[1] # Normalization variables self.normalization_layer = Normalization() self.normalization_layer.adapt(self.x_train) # tf.data Datasets self.train_dataset = tf.data.Dataset.from_tensor_slices((self.x_train, self.y_train)) self.test_dataset = tf.data.Dataset.from_tensor_slices((self.x_test, self.y_test)) self.val_dataset = tf.data.Dataset.from_tensor_slices((self.x_val, self.y_val)) # Dataset preparation self.train_dataset = self._prepare_dataset(self.train_dataset, shuffle=True) self.test_dataset = self._prepare_dataset(self.test_dataset) self.val_dataset = self._prepare_dataset(self.val_dataset)
inputWords = [[gloveDict.get(w, [0] * dims) for w in t] for t in inputWords] #vectorise words: if the word is unique to MTG it just gets set to 0 print("Vectorised oracle text") #split data into training and test datasets testNames = [data[i]["name"] for i in range(len(data)) if data[i]["set"] == "m20"] trainVecs = [inputVecs[i] for i in range(len(inputVecs)) if not data[i]["set"] == "m20"] testVecs = [inputVecs[i] for i in range(len(inputVecs)) if data[i]["set"] == "m20"] trainWords = tf.ragged.constant([inputWords[i] for i in range(len(inputWords)) if not data[i]["set"] == "m20"]) testWords = tf.ragged.constant([inputWords[i] for i in range(len(inputWords)) if data[i]["set"] == "m20"]) trainCorRars = [corRars[i] for i in range(len(corRars)) if not data[i]["set"] == "m20"] testCorRars = [corRars[i] for i in range(len(corRars)) if data[i]["set"] == "m20"] #normalise input vectors trainVecs = np.array(trainVecs).astype("float32") #convert to numpy format normalizer = Normalization() normalizer.adapt(trainVecs) trainVecs = normalizer(trainVecs) testVecs = normalizer(testVecs) print("Normalised numerical data") #build keras model wordIn = layers.Input(shape = (None, len(inputWords[0][0]))) #input layer for var-length word vec data numIn = layers.Input(shape = (len(inputVecs[0]), )) #input layer for fixed-length numerical data rnn = layers.LSTM(32)(wordIn) #RNN layer for word vec data numLayer = layers.Dense(20, activation = "relu")(numIn) #layer for numerical data merge = layers.concatenate([rnn, numLayer]) #combine the two vectors combLayer = layers.Dense(64, activation = "relu")(merge) #hidden intermediate layer for combined data out = layers.Dense(3, activation = "softmax")(combLayer) #final layer: softmax ensures output is a set of probabilities model = DualModel(inputs = [numIn, wordIn], outputs = [out]) model.compile(loss = "sparse_categorical_crossentropy", metrics = "sparse_categorical_accuracy", optimizer = "adam") #I have no idea whether these ones are the best ones to use
def __init__(self): super().__init__(Normalization())
# in the `adapt()` data. Unknown n-grams are encoded via an "out-of-vocabulary" # token. integer_data = vectorizer(training_data) print(integer_data) """ **Example: normalizing features** """ from tensorflow.keras.layers.experimental.preprocessing import Normalization # Example image data, with values in the [0, 255] range training_data = np.random.randint(0, 256, size=(64, 200, 200, 3)).astype("float32") normalizer = Normalization(axis=-1) normalizer.adapt(training_data) normalized_data = normalizer(training_data) print("var: %.4f" % np.var(normalized_data)) print("mean: %.4f" % np.mean(normalized_data)) """ **Example: rescaling & center-cropping images** Both the `Rescaling` layer and the `CenterCrop` layer are stateless, so it isn't necessary to call `adapt()` in this case. """ from tensorflow.keras.layers.experimental.preprocessing import CenterCrop from tensorflow.keras.layers.experimental.preprocessing import Rescaling
model.fit(x_train, y_train, epochs = 1, validation_split=0.1, batch_size=16) """ In general, it's a good practice to develop models that take raw data as input, as opposed to models that take already-preprocessed data. The reason being that, if your model expects preprocessed data, any time you export your model to use it elsewhere (in a web browser, in a mobile app), you'll need to reimplement the same exact preprocessing pipeline. This can be a bit tricky to do. """ # normalize in range [0, 1] scaling_layer = Rescaling(1.0 / 255) # normalize in range [-1, 1] input_ = tf.keras.Input(shape=(32, 32, 3)) norm_neg_one_to_one = Normalization() x = norm_neg_one_to_one(input_) import numpy as np mean = [127.5]*3 var = mean ** 2 norm_neg_one_to_one.set_weights([mean, var]) norm_neg_one_to_one.get_weights() # normalize with mean 0 and std 1 norm_mean_std = Normalization() norm_mean_std.adapt(x_train[0]) model_ = Sequential([ tf.keras.Input(shape=(32, 32, 3)), norm_mean_std, model
dept_targets = np.random.randint(2, size=(6, 4)) print(dept_targets) dataset = keras.preprocessing.image_dataset_from_directory('mian_directory', batch_size=64, image_size=(2000, 2000)) for data, labels in dataset: print(data.shape) print(data.dtype) print(labels.shape) print(labels.dtype) #规范化 trianing_data = np.random.randint(0, 256, size=(64, 200, 200, 3)).astype("float32") normalizetion = Normalization(axis=-1) #沿着最后一个下标变换的方向 normalizetion.adapt(trianing_data) normalizetion_data = normalizetion(trianing_data) print("var:%.4f" % np.var(normalizetion_data)) #np.var求标准差 print("mean:%.4f" % np.mean(normalizetion_data)) #重新缩放和中心裁剪图像 cropper = CenterCrop(height=150, width=150) scaler = Rescaling(scale=1.0 / 255) output_data = scaler(cropper( trianing_data)) #无论是Rescaling层与CenterCrop层是无状态的,所以没有必要调用adapt()在这种情况下。 print(output_data.shape) print("min:", np.min(output_data)) #使用Keras Functional API构建模型 dense = keras.layers.Dense(units=16) #它将其输入映射到16维特征空间:
class CALIHOUSING: def __init__(self, test_size: float = 0.2, validation_size: float = 0.33) -> None: # User-definen constants self.num_targets = 1 self.batch_size = 128 # Load the data set dataset = fetch_california_housing() self.x, self.y = dataset.data, dataset.target self.feature_names = dataset.feature_names self.description = dataset.DESCR # Split the dataset x_train, x_test, y_train, y_test = train_test_split( self.x, self.y, test_size=test_size) x_train, x_val, y_train, y_val = train_test_split( x_train, y_train, test_size=validation_size) # Preprocess x data self.x_train = x_train.astype(np.float32) self.x_test = x_test.astype(np.float32) self.x_val = x_val.astype(np.float32) # Preprocess y data self.y_train = np.reshape(y_train, (-1, self.num_targets)).astype(np.float32) self.y_test = np.reshape(y_test, (-1, self.num_targets)).astype(np.float32) self.y_val = np.reshape(y_val, (-1, self.num_targets)).astype(np.float32) # Dataset attributes self.train_size = self.x_train.shape[0] self.test_size = self.x_test.shape[0] self.num_features = self.x_train.shape[1] self.num_targets = self.y_train.shape[1] # Normalization variables self.normalization_layer = Normalization() self.normalization_layer.adapt(self.x_train) # tf.data Datasets self.train_dataset = tf.data.Dataset.from_tensor_slices( (self.x_train, self.y_train)) self.test_dataset = tf.data.Dataset.from_tensor_slices( (self.x_test, self.y_test)) self.val_dataset = tf.data.Dataset.from_tensor_slices( (self.x_val, self.y_val)) self.train_dataset = self._prepare_dataset(self.train_dataset, shuffle=True) self.test_dataset = self._prepare_dataset(self.test_dataset) self.val_dataset = self._prepare_dataset(self.val_dataset) def get_train_set(self) -> tf.data.Dataset: return self.train_dataset def get_test_set(self) -> tf.data.Dataset: return self.test_dataset def get_val_set(self) -> tf.data.Dataset: return self.val_dataset def _prepare_dataset(self, dataset: tf.data.Dataset, shuffle: bool = False) -> tf.data.Dataset: dataset = dataset.map( map_func=lambda x, y: (tf.reshape(self.normalization_layer( tf.reshape(x, shape=(1, self.num_features)), training=False), shape=(self.num_features, )), y), num_parallel_calls=tf.data.experimental.AUTOTUNE) if shuffle: dataset = dataset.shuffle(buffer_size=1_000) dataset = dataset.batch(batch_size=self.batch_size) return dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
def __init__(self, data=None, axis=-1): """ typically the last axis is the one we normalize over """ super().__init__(data=data) self.processor = Normalization(axis=axis)