def train_model_without_augmentation(): """ This function will train a fairly naive network using the segmented datasets we generated using mk_smaller_dataset() and plot the training and validation performance after training is complete. :return: None """ # Layout a pretty simple network and compile with binary crossentropy. model = models.Sequential() model.add( layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3))) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(64, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(128, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(128, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Flatten()) model.add(layers.Dense(512, activation='relu')) model.add(layers.Dense(1, activation='sigmoid')) model.summary() model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=1e-4), metrics=['acc']) # Create generates dedicate to provide scaled image inputs to the network. train_datagen = ImageDataGenerator(rescale=1. / 255) test_datagen = ImageDataGenerator(rescale=1. / 255) # One generator for training and one for validation train_generator = train_datagen.flow_from_directory( train_dir, target_size=(150, 150), batch_size=20, class_mode='binary') validation_generator = test_datagen.flow_from_directory( validation_dir, target_size=(150, 150), batch_size=20, class_mode='binary') # Train the model using the dedicated fit_generator() function [I think this is outdated now, but this is # how the book does it] history = model.fit_generator(train_generator, steps_per_epoch=100, epochs=30, validation_data=validation_generator, validation_steps=50) # Save the trained model and plot the training and validation performance model.save( filepath= 'C:\\Datasets\\dogs-vs-cats\\cats_and_dogs_small\\cats_and_dogs_small_1.h5' ) acc = history.history['acc'] val_acc = history.history['val_acc'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(1, len(acc) + 1) plt.plot(epochs, acc, 'bo', label='Training acc') plt.plot(epochs, val_acc, 'b', label='Validation acc') plt.title('Training and Validation Accuracy vs. Epoch #') plt.legend() plt.figure() plt.plot(epochs, loss, 'bo', label='Training loss') plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.title('Training and Validation Loss vs. Epoch #') plt.legend() plt.show()
batch_size=batch_size) # Verify class names: class_names = train_ds.class_names print(class_names) # Create neural network: model = tf.keras.models.Sequential([ layers.BatchNormalization(), layers.Conv2D(32, 3, activation='relu'), layers.MaxPooling2D(), layers.Conv2D(64, 3, activation='relu'), layers.MaxPooling2D(), layers.Conv2D(128, 3, activation='relu'), layers.MaxPooling2D(), layers.Flatten(), layers.Dense(256, activation='relu'), layers.Dense(len(class_names), activation='softmax') ]) # Compile Model: model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # Fit with 15 epochs: fitted = model.fit(training_data=train_ds, validation_data=val_ds, epochs=15) # Plot accuracy metrics from training: plt.plot(fitted.history['loss'], label='loss') plt.plot(fitted.history['accuracy'], label='accuracy')
def build_model( input_shape, num_classes, activation_function, dropout_rate, use_batchnorm, l2_regularization, cnn_layers, lstm_units, combine_mode, fcn_layers): ''' Builds a CNN-RNN-FCN classification model # Parameters input_shape (tuple) -- expected input shape num_classes (int) -- number of classes activation_function (str) -- non linearity to apply between layers dropout_rate (float) -- must be between 0 and 1 use_batchnorm (bool) -- if True, batchnorm layers are added between convolutions l2_regularization (float) cnn_layers (list) -- list specifying CNN layers. Each element must be of the form {filters: 32, kernel_size: 3, use_maxpool: true} lstm_units (int) -- number of hidden units of the lstm if lstm_units is None or 0 the LSTM layer is skipped combine_mode (str) -- specifies how the encoding of each image in the sequence is to be combined. Supports: concat : outputs are stacked on top of one another last : only last hidden state is returned attention : an attention mechanism is used to combine the hidden states fcn_layers (list) -- list specifying Dense layers example element: {units: 1024} # Returns model -- an uncompiled Keras model ''' # Regularizer l2_reg = l2(l2_regularization) # Build a model with the functional API inputs = ll.Input(input_shape) x = inputs # Reshape entry if needed if len(input_shape) == 3: x = ll.Reshape([1] + input_shape)(x) elif len(input_shape) < 3: raise ValueError(f"Input shape {input_shape} not supported") # CNN feature extractor for i, cnn_layer in enumerate(cnn_layers): # Extract layer params filters = cnn_layer['filters'] kernel_size = cnn_layer['kernel_size'] use_maxpool = cnn_layer['use_maxpool'] # build cnn_layer x = ll.TimeDistributed(ll.Conv2D( filters, kernel_size, strides=(1, 1), padding='same', data_format=None, dilation_rate=(1, 1), activation=activation_function, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=l2_reg, bias_regularizer=l2_reg, activity_regularizer=None, kernel_constraint=None, bias_constraint=None ), name=f'conv2D_{i}')(x) # add maxpool if needed if use_maxpool: x = ll.TimeDistributed(ll.MaxPooling2D( pool_size=(2, 2), strides=None, padding='valid', data_format=None ), name=f'maxpool_{i}')(x) if use_batchnorm: x = ll.TimeDistributed(ll.BatchNormalization( axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None ), name=f'batchnorm_{i}')(x) x = ll.TimeDistributed(ll.Flatten(), name='flatten')(x) x = ll.TimeDistributed(ll.Dropout(dropout_rate), name='dropout')(x) # LSTM feature combinator if lstm_units is not None and lstm_units > 0: x = ll.CuDNNLSTM( lstm_units, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=l2_reg, recurrent_regularizer=l2_reg, bias_regularizer=l2_reg, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, return_sequences=(combine_mode!='last'), return_state=False, go_backwards=False, stateful=False )(x) # Combine output of each sequence if combine_mode == 'concat': x = ll.Flatten()(x) elif combine_mode == 'last': if lstm_units is None or lstm_units == 0: # if no LSTM was used x = ll.Lambda(lambda x : x[:,-1,...])(x) # we extract the last element elif combine_mode == 'attention': attention = ll.TimeDistributed(ll.Dense(1), name='attention_score')(x) attention = ll.Flatten()(attention) attention = ll.Softmax()(attention) x = ll.dot([x, attention], axes=[-2, -1]) else: raise ValueError(f"Combine mode {combine_mode} not supported") # FCN classifier for fcn_layer in fcn_layers: # extract layer params units = fcn_layer['units'] # build layer x = ll.Dense( units, activation=activation_function, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=l2_reg, bias_regularizer=l2_reg, activity_regularizer=None, kernel_constraint=None, bias_constraint=None )(x) x = ll.Dropout(dropout_rate)(x) prediction = ll.Dense(num_classes, activation='softmax')(x) # Build model model = Model(inputs=inputs, outputs=prediction) return model
def __init__(self, num_classes=10): super(VGG, self).__init__(name='VGG') self.num_classes = num_classes self.block1_conv1 = layers.Conv2D(filters=64, kernel_size=[3, 3], padding="same", activation='relu', name='block1_conv1') self.block1_conv2 = layers.Conv2D(filters=64, kernel_size=[3, 3], padding="same", activation='relu', name='block1_conv2') self.block1_pool = layers.MaxPool2D(pool_size=(2, 2), strides=2, name='block1_pool') self.block2_conv1 = layers.Conv2D(filters=128, kernel_size=[3, 3], padding="same", activation='relu', name='block2_conv1') self.block2_conv2 = layers.Conv2D(filters=128, kernel_size=[3, 3], padding="same", activation='relu', name='block2_conv2') self.block2_pool = layers.MaxPool2D(pool_size=(2, 2), strides=2, name='block2_pool') self.block3_conv1 = layers.Conv2D(filters=256, kernel_size=[3, 3], padding="same", activation='relu', name='block3_conv1') self.block3_conv2 = layers.Conv2D(filters=256, kernel_size=[3, 3], padding="same", activation='relu', name='block3_conv2') self.block3_conv3 = layers.Conv2D(filters=256, kernel_size=[3, 3], padding="same", activation='relu', name='block3_conv3') self.block3_pool = layers.MaxPool2D(pool_size=(2, 2), strides=2, name='block3_pool') self.block4_conv1 = layers.Conv2D(filters=512, kernel_size=[3, 3], padding="same", activation='relu', name='block4_conv1') self.block4_conv2 = layers.Conv2D(filters=512, kernel_size=[3, 3], padding="same", activation='relu', name='block4_conv2') self.block4_conv3 = layers.Conv2D(filters=512, kernel_size=[3, 3], padding="same", activation='relu', name='block4_conv3') self.block4_pool = layers.MaxPool2D(pool_size=(2, 2), strides=2, name='block4_pool') self.block5_conv1 = layers.Conv2D(filters=512, kernel_size=[3, 3], padding="same", activation='relu', name='block5_conv1') self.block5_conv2 = layers.Conv2D(filters=512, kernel_size=[3, 3], padding="same", activation='relu', name='block5_conv2') self.block5_conv3 = layers.Conv2D(filters=512, kernel_size=[3, 3], padding="same", activation='relu', name='block5_conv3') self.block5_pool = layers.MaxPool2D(pool_size=(2, 2), strides=2, name='block5_pool') self.flat = layers.Flatten() self.fc1 = layers.Dense(4096, activation='relu', name='fc1') self.dropout1 = layers.Dropout(rate=0.5) self.fc2 = layers.Dense(4096, activation='relu', name='fc2') self.dropout2 = layers.Dropout(rate=0.5) self.final_fc = layers.Dense(num_classes)
def build_DCAE(m, img_size=(64, 64)): use_bias = True l2_norm = 0 x = keras.Input(shape=img_size + (2 * m, )) y = layers.Conv2D(32, 3, padding='same', activation="relu", use_bias=use_bias, kernel_regularizer=keras.regularizers.l2(l2_norm))(x) y = layers.MaxPooling2D(2, padding='same')(y) y = layers.Conv2D(64, 3, padding='same', activation="relu", use_bias=use_bias, kernel_regularizer=keras.regularizers.l2(l2_norm))(y) y = layers.MaxPooling2D(2, padding='same')(y) y = layers.Conv2D(128, 3, padding='same', activation="relu", use_bias=use_bias, kernel_regularizer=keras.regularizers.l2(l2_norm))(y) y = layers.MaxPooling2D(2, padding='same')(y) y = layers.Conv2D(32, 3, padding='same', activation="relu", use_bias=use_bias, kernel_regularizer=keras.regularizers.l2(l2_norm))( y) # 32 featmap_shape = y.shape[1:] y = encoded = layers.Flatten(name='encoded')(y) y = layers.Reshape(featmap_shape)(y) y = layers.UpSampling2D(size=(2, 2))(y) y = layers.Conv2D(128, 3, padding='same', activation="relu", use_bias=use_bias, kernel_regularizer=keras.regularizers.l2(l2_norm))(y) y = layers.UpSampling2D(size=(2, 2))(y) y = layers.Conv2D(64, 3, padding='same', activation="relu", use_bias=use_bias, kernel_regularizer=keras.regularizers.l2(l2_norm))(y) y = layers.UpSampling2D(size=(2, 2))(y) y = layers.Conv2D(32, 3, padding='same', activation="relu", use_bias=use_bias, kernel_regularizer=keras.regularizers.l2(l2_norm))(y) decoded = layers.Conv2D(2 * m, 3, padding='same', activation="tanh", use_bias=use_bias, name='decoded')(y) ae = keras.Model(x, decoded, name="DCAE") return ae
# Multi-layer Perceptrion (MLP) with 2 hidden layers # 1 - First layer just flattens input into a 1D array. # Data matrix is reshapped with (-1, 1) - The -1 here lets Python # infer the first dimension while the second is forced to 1. # E.g. if matrix is 100x3 it will infer 300, reshapping to (300, 1). # You must specify each input example's shape, hence the parameter # inpuy_shape being passed with seach image shape (28,28). # 2 - Dense hidden layers with 300 and 100 neurons + ReLU activation. # Each dense layer manages its own weight matrix, with all the connection # weights between the neurons and their inputs. It also manages a vector # of bias terms (one per neuron). # 3 - The last layer is a dense output layer with 10 neurons, one per class, # using the softmax activation function, because classes are exclusive. model = keras.models.Sequential() model.add(layers.Flatten(input_shape=[28, 28])) #1 model.add(layers.Dense(300, activation=activations.relu)) #2 model.add(layers.Dense(100, activation=activations.relu)) #2 model.add(layers.Dense(10, activation=activations.softmax)) #3 # Note: alternatively, you could pass all layers to Sequential this way: # model = keras.models.Sequential( [ layer1, layer2, ... ] ) model.summary() # Not working, complains with this error, althoug everything is installed: # ('Failed to import pydot. You must `pip install pydot` and install graphviz # (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.') # keras.utils.plot_model(model)
pre_trained_model.load_weights(local_weights_file) for layer in pre_trained_model.layers: layer.trainable = False # pre_trained_model.summary() last_layer = pre_trained_model.get_layer('mixed7') print('last layer output shape: ', last_layer.output_shape) last_output = last_layer.output from tensorflow.keras.optimizers import RMSprop # Flatten the output layer to 1 dimension x = layers.Flatten()(last_output) # Add a fully connected layer with 1,024 hidden units and ReLU activation x = layers.Dense(1024, activation='relu')(x) # Add a dropout rate of 0.2 x = layers.Dropout(0.2)(x) # Add a final sigmoid layer for classification x = layers.Dense (1, activation='sigmoid')(x) model = Model( pre_trained_model.input, x) model.compile(optimizer = RMSprop(lr=0.0001), loss = 'binary_crossentropy', metrics = ['accuracy']) from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_images = train_images / 255.0 from tensorflow.keras import layers from tensorflow.keras.applications.resnet50 import ResNet50 from tensorflow.keras.models import Sequential, Model, load_model img_input = layers.Input(shape=(224, 224, 3)) base_model = ResNet50(weights='imagenet') #Prepare DNN model model = tf.keras.models.Sequential() model.add(base_model) model.add(layers.Flatten(input_shape=(224, 224))) model.add(layers.Dense(units=128, activation='relu')) #model.add(layers.Dropout(0.5)) model.add(layers.Dense(units=64, activation='relu')) model.add(layers.Dense(units=10, activation='softmax')) model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.001), loss=tf.keras.losses.CategoricalCrossentropy(), metrics=['accuracy']) #Train simple DNN. history = model.fit(train_images, train_labels, batch_size=32, epochs=60, validation_split=0.2)
#Build the CNN input_spec = layers.Input(shape=(22, )) #input spectra MidLayer = layers.Reshape((22, 1))(input_spec) #reshape for use in CNN # CNN layers MidLayer = layers.Conv1D(activation='relu', padding="same", filters=35, kernel_size=12)(MidLayer) MidLayer = layers.Conv1D(activation='relu', padding="same", filters=60, kernel_size=6)(MidLayer) ## Max pooling layer MidLayer = layers.MaxPooling1D(pool_size=8)(MidLayer) MidLayer = layers.Flatten()(MidLayer) #flatten for use in dense layers # Dense layers MidLayer = layers.Dense(units=30, activation='relu')(MidLayer) output_label = layers.Dense( units=1, activation="linear", #output layer input_dim=30, )(MidLayer) model = keras.models.Model(input_spec, output_label) earlystop = EarlyStopping(monitor='mean_absolute_error', patience=250) callbacks_list = [earlystop] optimizer = tf.train.RMSPropOptimizer(0.001) model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
from tensorflow.keras.layers import Dense, Flatten, Conv2D, BatchNormalization, Dropout, MaxPooling2D, Activation from keras.models import Model from keras import Sequential #from metadata import set_data #NUM_CLASSES defined in metadata CLASSES = ['angry', 'fear', 'happy', 'neutral', 'sad', 'surprise'] #, 'disgust'] NUM_CLASSES = len(CLASSES) input_shape = (48, 48, 1) #%% mlp_all = keras.Sequential([ keras.Input(shape=input_shape), layers.Flatten(), layers.Dense(4000, activation="sigmoid"), #layers.Dropout(0.2), layers.Dense(1000, activation="sigmoid"), #layers.Dropout(0.2), layers.Dense(100, activation="sigmoid"), layers.Dense(NUM_CLASSES, activation="softmax"), ]) mlp_all.compile( loss="categorical_crossentropy", optimizer="adam", #keras.optimizers.Adam(learning_rate=0.01), metrics=["accuracy"]) conv_all = keras.Sequential([ keras.Input(shape=input_shape),
# x = layers.Flatten()(x) # x = layers.Dense(1024, activation="relu")(x) # this layer further improves perf # x = layers.Dropout(.2)(x) # outputs = layers.Dense(120, activation= 'sigmoid')(x) # model = keras.Model(inputs, outputs) # (C) CNN model (~2 conv layers): ~ 230k paras inputs = keras.Input(shape=(12, 120, 1)) # 1 is needed here to keep the same dim with next conv2D layer x = layers.experimental.preprocessing.Rescaling(1.0 / 255)(inputs) x = layers.Conv2D(filters=8, kernel_size=(3, 3), padding='same', activation="relu")(x) x = layers.MaxPooling2D(pool_size=(3, 3), strides=(1, 1),padding='same')(x) # stride =1, not =3 be default x = layers.Dropout(.2)(x) x = layers.Conv2D(filters=4, kernel_size=(3, 3), padding='same', activation="relu")(x) x = layers.MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same')(x) # stride =1, not =2 be default x = layers.Dropout(.2)(x) x = layers.Flatten()(x) # x = layers.Dense(1024, activation="relu")(x) # it does not further improve! outputs = layers.Dense(120, activation= 'sigmoid')(x) model = keras.Model(inputs, outputs) # show model model.summary() #%% model.compile loss = tf.keras.losses.BinaryCrossentropy() # optimizer = tf.keras.optimizers.SGD(learning_rate=0.01) optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07) METRICS = [
conv_3_layer = layers.Conv1D(100, 3, activation='relu', name="filter_size_3")(embedding_layer) conv_4_layer = layers.Conv1D(100, 4, activation='relu', name="filter_size_4")(embedding_layer) conv_5_layer = layers.Conv1D(100, 5, activation='relu', name="filter_size_5")(embedding_layer) max_pool_3_layer = layers.MaxPool1D(pool_size=52, name="max_pool_3", padding="same")(conv_3_layer) max_pool_4_layer = layers.MaxPool1D(pool_size=51, name="max_pool_4", padding="same")(conv_4_layer) max_pool_5_layer = layers.MaxPool1D(pool_size=50, name="max_pool_5", padding="same")(conv_5_layer) flatten_3_layer = layers.Flatten()(max_pool_3_layer) flatten_4_layer = layers.Flatten()(max_pool_4_layer) flatten_5_layer = layers.Flatten()(max_pool_5_layer) concatenate_layer = layers.concatenate( [flatten_3_layer, flatten_4_layer, flatten_5_layer]) dropout_layer = layers.Dropout(rate=0.5)(concatenate_layer) outputs = layers.Dense(2, activation="softmax")(dropout_layer) model = keras.Model(inputs=inputs, outputs=outputs, name="test_model") keras.utils.plot_model(model, "my_first_model.png", show_shapes=True) model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), metrics=['accuracy']) train_data = load_text.train_data
def mnist_conv_net(): print("Let's first take a look at what a typical convnet looks like:") model = models.Sequential() model.add( layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1))) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(64, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(64, (3, 3), activation='relu')) model.add(layers.Flatten()) model.add(layers.Dense(64, activation='relu')) model.add(layers.Dense(10, activation='softmax')) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) model.summary() print( "This particular model is actually quite useful for the mnist dataset we encountered in Chapter 2. As before," "we'll normalise the inputs and categorise the outputs.") # Load data and print useful information (train_images, train_labels), (test_images, test_labels) = mnist.load_data() print( f"There are {len(train_images)} training images, each of shape {train_images[0].shape}: dtype = " f"{train_images[0].dtype},") print( f"and {len(test_images)} training images, each of shape {test_images[0].shape}: dtype = " f"{test_images[0].dtype}\n") # Reshape and normalise the inputs for use in the neural network print('Reshaping and normalising inputs:') train_images = train_images.reshape((len(train_images), 28, 28, 1)) train_images = train_images.astype('float32') / 255 test_images = test_images.reshape((len(test_images), 28, 28, 1)) test_images = test_images.astype('float32') / 255 print( f"Now there are {len(train_images)} training images, each of shape {train_images[0].shape}: dtype = " f"{train_images[0].dtype},") print( f"and {len(test_images)} training images, each of shape {test_images[0].shape}: dtype = " f"{test_images[0].dtype}\n") # Convert the outputs to categorical labels which are more useful for training print('Converting the outputs to categorical labels:') train_labels = to_categorical(train_labels) test_labels = to_categorical(test_labels) # Train the model and evaluate the performance print( "\nNow let's train and evaluate the model to see how it performs on the 1st try:" ) model.fit(train_images, train_labels, epochs=5, batch_size=64) test_loss, test_acc = model.evaluate(test_images, test_labels) print( f"Test Accuracy = {test_acc * 100}%\nNote the immediate rise in test accuracy. This fairly naive network " f"can easily outperform simple Dense networks due to convnets' inherent resistance to translational and spatial " f"variations. But what more can convnets do?")
def train_model_with_augmentation(): """ This function will train a more sophisticated network using the segmented datasets we generated using mk_smaller_dataset() and augmentation and plot the training and validation performance after training is complete. :return: None """ # Layout a network with Dropout this time and compile with binary crossentropy. model = models.Sequential() model.add( layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3))) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(64, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(128, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(128, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Flatten()) model.add(layers.Dropout(0.5)) model.add(layers.Dense(512, activation='relu')) model.add(layers.Dense(1, activation='sigmoid')) model.summary() model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=1e-4), metrics=['acc']) # Create generators for both training and validation. Give the training generator augmentation functionality train_datagen = ImageDataGenerator(rescale=1. / 255, rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) test_datagen = ImageDataGenerator(rescale=1. / 255) train_generator = train_datagen.flow_from_directory( train_dir, target_size=(150, 150), batch_size=20, class_mode='binary') validation_generator = test_datagen.flow_from_directory( validation_dir, target_size=(150, 150), batch_size=20, class_mode='binary') # Train the model. More epochs will be needed this time for 2 reasons: # -> Dropout will greatly increase time to convergence # -> Augmentation results in effectively infinite input variation. history = model.fit_generator(train_generator, steps_per_epoch=100, epochs=100, validation_data=validation_generator, validation_steps=50) # Save the model and plot the training and validation performance model.save( filepath= 'C:\\Datasets\\dogs-vs-cats\\cats_and_dogs_small\\cats_and_dogs_small_2.h5' ) acc = history.history['acc'] val_acc = history.history['val_acc'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(1, len(acc) + 1) plt.plot(epochs, acc, 'bo', label='Training acc') plt.plot(epochs, val_acc, 'b', label='Validation acc') plt.title('Training and Validation Accuracy vs. Epoch #') plt.legend() plt.figure() plt.plot(epochs, loss, 'bo', label='Training loss') plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.title('Training and Validation Loss vs. Epoch #') plt.legend() plt.show()
def pre_trained_embedding_with_imdb(): """ Similar to the last function, this function demonstrates the use of embeddings with the IMDB dataset, however this time we're using a pre-trained embedding layer: the GloVe embedding created at Stanford. :return: None """ # Get the directory of the IMDB dataset imdb_dir = 'C:\\Datasets\\IMDB\\aclImdb' train_dir = os.path.join(imdb_dir, 'train') # Prepare lists for the labels and the text inputs from the dataset labels = [] texts = [] # Extract all the text inputs, negative first then positive. Store the inputs and the labels in order. for label_type in ['neg', 'pos']: dir_name = os.path.join(train_dir, label_type) for fname in os.listdir(dir_name): if fname[-4:] == '.txt': f = open(os.path.join(dir_name, fname), encoding="utf8") texts.append(f.read()) f.close() if label_type == 'neg': labels.append(0) else: labels.append(1) # Settings for the system: cut reviews off after 100 words, train on 200 samples, validate on 10,000 samples, # use only the top 10,000 words max_len = 100 training_samples = 200 validation_samples = 10000 max_words = 10000 # Build the tokeniser tokeniser = Tokenizer(num_words=max_words) tokeniser.fit_on_texts(texts) sequences = tokeniser.texts_to_sequences(texts) word_index = tokeniser.word_index print(f"Found {len(word_index)} unique tokens\n") data = preprocessing.sequence.pad_sequences(sequences, maxlen=max_len) labels = np.asarray(labels) print(f"Shape of data tensor: {data.shape}") print(f"Shape of labels tensor: {labels.shape}\n") # Shuffle the data and split it into training and validation sets indices = np.arange(data.shape[0]) np.random.shuffle(indices) data = data[indices] labels = labels[indices] x_train = data[:training_samples] y_train = labels[:training_samples] x_val = data[training_samples:training_samples + validation_samples] y_val = labels[training_samples:training_samples + validation_samples] # Open the 100d glove embedding file and extract the embeddings into a dictionary glove_dir = 'C:\\Datasets\\GloVe Embeddings' embeddings_idx = {} f = open(os.path.join(glove_dir, 'glove.6B.100d.txt'), encoding="utf8") for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_idx[word] = coefs f.close() print(f"Found {len(embeddings_idx)} word vectors\n") # Now we need an Embedding matrix which must be of shape (max_words, embedding_dim), where each i contains the # embedding_dim dimensional vector for the word of index i in the reference word index. Note that index 0 is # only a placeholder. embedding_dim = 100 embedding_matrix = np.zeros((max_words, embedding_dim)) for word, i in word_index.items(): if i < max_words: embedding_vector = embeddings_idx.get(word) if embedding_vector is not None: # Words not encountered in the embedding index will be given all zeros embedding_matrix[i] = embedding_vector # Now let's build a simple model and set the weights of the embedding layer using the matrix we just generated model = models.Sequential() model.add(layers.Embedding(max_words, embedding_dim, input_length=max_len)) model.add(layers.Flatten()) model.add(layers.Dense(32, activation='relu')) model.add(layers.Dense(1, activation='sigmoid')) # Freeze the embedding layer to prevent forgetting the pre-learned embedding space model.layers[0].set_weights([embedding_matrix]) model.layers[0].trainable = False # Compile and train the model model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc']) model.summary() history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_val, y_val)) # Save the model model.save('C:\\Datasets\\IMDB\\pretrained_glove_model.h5') # Plot the training and validation accuracy and loss acc = history.history['acc'] val_acc = history.history['val_acc'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(1, len(acc) + 1) plt.plot(epochs, acc, 'bo', label='Training Acc') plt.plot(epochs, val_acc, 'b', label='Validation Acc') plt.title('Training and Validation Accuracy vs. Epochs') plt.legend() plt.figure() plt.plot(epochs, loss, 'bo', label='Training Loss') plt.plot(epochs, val_loss, 'b', label='Validation Loss') plt.title('Training and Validation Loss vs. Epochs') plt.legend() plt.show() # Now for testing purposes, lets compile the test data from the dataset and evaluate the model test_dir = os.path.join(imdb_dir, 'test') labels = [] texts = [] for label_type in ['neg', 'pos']: dir_name = os.path.join(test_dir, label_type) for fname in sorted(os.listdir(dir_name)): if fname[-4:] == '.txt': f = open(os.path.join(dir_name, fname), encoding="utf8") texts.append(f.read()) f.close() if label_type == 'neg': labels.append(0) else: labels.append(1) sequences = tokeniser.texts_to_sequences(texts) x_test = preprocessing.sequence.pad_sequences(sequences, max_len) y_test = np.asarray(labels) # Evaluate the model model.evaluate(x_test, y_test)
def f_define_model(config_dict, name='1'): ''' Function that defines the model and compiles it. ''' ### Extract info from the config_dict shape = config_dict['model']['input_shape'] loss_fn = config_dict['training']['loss'] metrics = config_dict['training']['metrics'] resnet = False ### Variable storing whether the models is resnet or not. This is needed for specifying the loss function. custom_model = False ### Variable storing whether the models is a layer-by-layer build code (not using the protytype function). # Choose model if name == '1': # Simple layered, with inner dropout model_par_dict = { 'conv_size_list': [10, 10, 10], 'kernel_size': (3, 3), 'pool_size': (2, 2), 'strides': 1, 'no_pool': False, 'learn_rate': 0.001, 'outer_dropout': 0.5, 'inner_dropout': 0.1, 'dense_size': 64, 'final_activation': 'sigmoid', 'double_conv': False } elif name == '2': # Simple layered, without inner dropout model_par_dict = { 'conv_size_list': [10, 10, 10], 'kernel_size': (3, 3), 'pool_size': (2, 2), 'strides': 1, 'no_pool': False, 'learn_rate': 0.001, 'outer_dropout': 0.5, 'inner_dropout': None, 'dense_size': 64, 'final_activation': 'sigmoid', 'double_conv': False } elif name == '3': # More layers model_par_dict = { 'conv_size_list': [64, 64, 64, 64], 'kernel_size': (3, 3), 'pool_size': (2, 2), 'strides': 1, 'no_pool': False, 'learn_rate': 0.001, 'outer_dropout': 0.5, 'inner_dropout': 0.5, 'dense_size': 64, 'final_activation': 'sigmoid', 'double_conv': True } ### Strides instead of pools elif name == '4': # Striding model_par_dict = { 'conv_size_list': [128, 128, 64, 64], 'kernel_size': (3, 3), 'pool_size': (2, 2), 'strides': [1, 2, 3, 4], 'no_pool': True, 'learn_rate': 0.001, 'outer_dropout': 0.2, 'inner_dropout': 0.5, 'dense_size': 64, 'final_activation': 'sigmoid', 'double_conv': True } elif name == '5': # Striding model_par_dict = { 'conv_size_list': [128, 128, 64, 64], 'kernel_size': (3, 3), 'pool_size': (2, 2), 'strides': [1, 2, 3, 4], 'no_pool': True, 'learn_rate': 0.001, 'outer_dropout': 0.2, 'inner_dropout': None, 'dense_size': 64, 'final_activation': 'sigmoid', 'double_conv': True } elif name == '6': # Striding model_par_dict = { 'conv_size_list': [64, 64, 32, 32], 'kernel_size': (3, 3), 'pool_size': (2, 2), 'strides': [1, 2, 3, 4], 'no_pool': True, 'learn_rate': 0.001, 'outer_dropout': 0.2, 'inner_dropout': None, 'dense_size': 64, 'final_activation': 'sigmoid', 'double_conv': True } elif name == '0': # Model used in ATLAS paper custom_model = True learn_rate = 0.001 inputs = layers.Input(shape=shape) h = inputs # Convolutional layers h = Conv2D(64, kernel_size=(3, 3), activation='relu', strides=1, padding='same')(h) h = Conv2D(128, kernel_size=(3, 3), activation='relu', strides=2, padding='same')(h) h = Conv2D(256, kernel_size=(3, 3), activation='relu', strides=1, padding='same')(h) h = Conv2D(256, kernel_size=(3, 3), activation='relu', strides=2, padding='same')(h) h = Flatten()(h) h = Dense(512, activation='relu')(h) y = Dense(1, activation='sigmoid')(h) # Ouptut layer outputs = layers.Dense(1, activation='sigmoid')(h) elif name == '10': # Resnet 50 inputs = layers.Input(shape=shape) model = ResNet50(img_input=inputs) learn_rate = 0.0005 resnet = True elif name == '11': # Resnet 50 inputs = layers.Input(shape=shape) model = ResNet18(img_input=inputs) learn_rate = 0.0005 resnet = True ### A custom layered cnn is name=0 elif name == '30': custom_model = True learn_rate = 0.001 inputs = layers.Input(shape=shape) h = inputs # Convolutional layers conv_sizes = [128, 128, 256] conv_args = dict(kernel_size=(4, 4), activation='relu', padding='same') for conv_size in conv_sizes: h = layers.Conv2D(conv_size, **conv_args)(h) h = layers.Conv2D(conv_size, **conv_args)(h) h = layers.Conv2D(conv_size, **conv_args)(h) h = layers.Conv2D(conv_size, **conv_args)(h) h = layers.MaxPooling2D(pool_size=(2, 2))(h) #h = layers.Dropout(rate=0.5)(h) h = layers.Flatten()(h) # Fully connected layers h = layers.Dense(64, activation='relu')(h) h = layers.Dropout(rate=0.5)(h) # Ouptut layer outputs = layers.Dense(1, activation='sigmoid')(h) ############################################ ### Add more models above ############################################ ####### Compile model ###################### ############################################ if resnet: print("resnet model name", name) opt, loss_fn = optimizers.Adam( lr=learn_rate), 'sparse_categorical_crossentropy' else: ## For non resnet models if not custom_model: ### For non-custom models, use prototype function outputs, inputs = f_model_prototype(shape, **model_par_dict) learn_rate = model_par_dict['learn_rate'] model = models.Model(inputs, outputs) opt = optimizers.Adam(lr=learn_rate) model.compile(optimizer=opt, loss=loss_fn, metrics=metrics) #print("model %s"%name) return model
def temperature_forecasting_example(): """ This function will look at a more advanced application for Recurrent Neural Networks: forecasting the temperature based on previous data. More specifically, the goal will be to create a model which uses the weather data from the past to predict the temperature 24hrs from now. :return: None """ # First let's set up the path to the data data_dir = 'C:\\Datasets\\jena_climate' fname = os.path.join(data_dir, 'jena_climate_2009_2016.csv') # Let's open the file, read the data, close the file and then pull the data apart nto more useful structures. f = open(fname) data_from_file = f.read() f.close() lines = data_from_file.split('\n') header = lines[0].split(',') lines = lines[1:] print(f"Column headers in the dataset: ") for name in header: print(name) print(f"\nThe data is of shape: ({len(lines)}, {len(header)})") # Now let's convert all 420,551 lines of data into a Numpy array. For this dataset measurements were taken every # 10 minutes. float_data = np.zeros((len(lines), len(header) - 1)) for i, line in enumerate(lines): values = [float(x) for x in line.split(',')[1:]] float_data[i, :] = values temp = float_data[:, 1] plt.plot(range(len(temp)), temp) plt.title('Temperature Measurements across Time') plt.xlabel('Sample #') plt.ylabel('Temp (deg C)') plt.show() plt.plot(range(1440), temp[:1440]) plt.title('Temperature Measurements across 1st 10 Days') plt.xlabel('Sample #') plt.ylabel('Temp (deg C)') plt.show() # Now let's prepare the data for presentation to a Neural Network. We'll use the first 200,000 samples for # training, so only pre-process those inputs with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) mean = np.mean(float_data[:200000], axis=0) float_data -= mean std = np.std(float_data[:200000], axis=0) float_data /= std look_back = 1440 step_size = 6 delay_size = 144 batch = 128 # Now we'll make a generator that takes the current array of float data and yields batches of data from the # recent past, along with a target temperature in the future. Because the dataset is largely redundant (sample N # and sample N+1 will have most of their timestamps in common), it would be wasteful to explicitly allocate # every sample def generator(data_input, lookback: int, delay: int, min_index: int, max_index: int = None, shuffle: bool = False, batch_size: int = 128, step: int = 6): if max_index is None: max_index = len(data_input) - delay - 1 assert min_index < max_index idx = max_index + lookback while True: if shuffle: rows = np.random.randint(min_index, max_index, size=batch_size) else: if idx + batch_size >= max_index: idx = min_index + lookback rows = np.arange(i, min(i + batch_size, max_index)) idx += len(rows) samples = np.zeros((len(rows), lookback // step, data_input.shape[-1])) targets = np.zeros((len(rows),)) for idx2, row in enumerate(rows): slice_begin = max(0, rows[idx2] - lookback) if slice_begin == 0: slice_end = lookback else: slice_end = rows[idx2] indices = slice(slice_begin, slice_end, step) samples[idx2] = data_input[indices] targets[idx2] = data_input[rows[idx2] + delay][1] yield samples, targets train_gen = generator(float_data, look_back, delay_size, min_index=0, max_index=200000, shuffle=True, step=step_size, batch_size=batch) val_gen = generator(float_data, look_back, delay_size, min_index=200001, max_index=300000, shuffle=False, step=step_size, batch_size=batch) test_gen = generator(float_data, look_back, delay_size, min_index=300001, max_index=None, shuffle=False, step=step_size, batch_size=batch) val_steps = (300000 - 200001 - look_back) test_steps = (len(float_data) - 300001 - look_back) # For the sake of comparison it's often quite valuable to create a deterministic baseline against which to # compare the ML model. In this case of predicting temperature, we can assume that the temperature tomorrow # would be very similar to the temperature today, so using the Mean Absolute Error (MAE) metric we'd expect the # ML model to have a lower MAE than a model which simply states that the temperature tomorrow is the same as the # temperature today. def evaluation_naive_method(): batch_maes = [] with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) for step in range(val_steps): samples, targets = next(val_gen) preds = samples[:, -1, -1] mae = np.nanmean(np.abs(preds - targets)) batch_maes.append(mae) print(np.mean(batch_maes)) evaluation_naive_method() # In the same way that using a non-ML baseline is useful, it's also quite useful to attempt a simple network # first to establish an ML baseline. This will mean that any further complexity thrown at the problem will be # justified. model = models.Sequential() model.add(layers.Flatten(input_shape=(look_back // step_size, float_data.shape[-1]))) model.add(layers.Dense(32, activation='relu')) model.add(layers.Dense(1)) model.compile(optimizer=RMSprop(), loss='mae') # This is not working at all. Validation is simply failing constantly history = model.fit(train_gen, steps_per_epoch=500, epochs=20, validation_data=val_gen, validation_steps=500) loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(1, len(loss) + 1) plt.plot(epochs, loss, 'bo', label='Training Loss') plt.plot(epochs, val_loss, 'b', label='Validation Loss') plt.title('Training and Validation Loss vs. Epochs') plt.legend() plt.show() model = None # Now let's try a Recurrent network. Rather than an LSTM, let's try a Gated Recurrent Unit (GRU), which work # using the same principals as LSTM's but are somewhat streamlined and thus cheaper to run. model = models.Sequential() model.add(layers.GRU(32, input_shape=(None, float_data.shape[-1]))) model.add(layers.Dense(1)) model.compile(optimizer=RMSprop(), loss='mae') # This is not working at all. Validation is simply failing constantly, and it takes a year to complete. history = model.fit(train_gen, steps_per_epoch=500, epochs=20, validation_data=val_gen, validation_steps=val_steps) loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(1, len(loss) + 1) plt.plot(epochs, loss, 'bo', label='Training Loss') plt.plot(epochs, val_loss, 'b', label='Validation Loss') plt.title('Training and Validation Loss vs. Epochs') plt.legend() plt.show() # Given that these networks do not appear to be training as expected, I will now simply list the remaining # network topographies that can be used for this problem and give a few words to why they work. # We're already familiar with the idea of dropout for deep neural networks. However, applying a random dropout # mask to the recurrent branch of the network will greatly disrupt the signal on the feedback loop and hinder # training. The correct approach is to apply a temporally constant dropout mask to the feedback loop, allowing # the network to train with the presence of the error signal and avoid overfitting. Hence there are 2 dropout # values: one for the input and one for the feedback loop. model = models.Sequential() model.add(layers.GRU(32, dropout=0.2, recurrent_dropout=0.2, input_shape=(None, float_data.shape[-1]))) model.add(layers.Dense(1)) model.compile(optimizer=RMSprop(), loss='mae') # Depending on the overfitting performance of the previous designs, the next tactic is to increase the capacity # of the network, achieved by adding more units to layers and more layers to the network. Note that when # stacking recurrent layers you must ensure that intermediate layers return their entire sequence output, rather # than just the last output model = models.Sequential() model.add(layers.GRU(32, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, float_data.shape[-1]))) model.add(layers.GRU(64, dropout=0.1, recurrent_dropout=0.5)) model.add(layers.Dense(1)) model.compile(optimizer=RMSprop(), loss='mae') # Now we could try increasing the complexity of the network design. Here we'll attempt the use of a # bi-directional RNN. This layout (having 2 RNN's working together, one processing the data in chronological # order and one in antichronological order) works incredibly well on time-sensitive or order-sensitive data, and # as such they are the go-to for natural language processing problems. By viewing the input sequence both ways # the system can learn to detect patterns that may go overlooked in unidirectional processing. However they do # run into problems on sequences data where the recent past is much more informative than the beginning of the # sequence. model = models.Sequential() model.add(layers.Bidirectional(layers.GRU(32), input_shape=(None, float_data.shape[-1]))) model.add(layers.Dense(1)) model.compile(optimizer=RMSprop(), loss='mae')
def create_q_model(): # Network defined by the Deepmind paper inputs = layers.Input(shape=( 32, 32, 3, )) if condition == 0: # VGG 1 BLOCK layer1 = layers.Conv2D( 32, kernel_size=(3, 3), activation="relu", kernel_initializer='he_uniform', padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01))(inputs) layer2 = layers.Conv2D( 32, kernel_size=(3, 3), activation="relu", kernel_initializer='he_uniform', padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01))(layer1) layer3 = layers.MaxPooling2D(pool_size=(2, 2))(layer2) layer4 = layers.Flatten()(layer3) layer5 = layers.Dense(128, activation="relu", kernel_initializer='he_uniform')(layer4) classification = layers.Dense(n_outputs, activation="softmax")(layer5) # layer1 = layers.Conv2D(32, kernel_size=(3, 3), activation="relu", # padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01))(inputs) # layer11 = layers.BatchNormalization()(layer1) # layer2 = layers.Conv2D(32, kernel_size=(3, 3), activation="relu", # padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01))(layer11) # layer21 = layers.BatchNormalization()(layer2) # layer3 = layers.MaxPooling2D(pool_size=(2, 2))(layer21) # # layer4 = layers.Flatten()(layer3) # layer5 = layers.Dense(128, activation="relu")(layer4) # layer51 = layers.BatchNormalization()(layer5) # classification = layers.Dense(n_outputs, activation=tf.keras.activations.softmax)(layer51) elif condition == 1: layer1 = layers.Conv2D(4, kernel_size=(5, 5), activation="relu")(inputs) layer2 = layers.AveragePooling2D(pool_size=(2, 2))(layer1) layer3 = layers.Conv2D(8, kernel_size=(5, 5), activation="relu")(layer2) layer4 = layers.AveragePooling2D(pool_size=(2, 2))(layer3) layer5 = layers.Flatten()(layer4) #layer6 = layers.Dense(128, activation="relu")(layer5) #classification = layers.Dense(n_outputs, activation="softmax")(layer6) classification = layers.Dense(n_outputs, activation="softmax")(layer5) elif condition == 2: # VGG 2 BLOCK layer1 = layers.Conv2D( 32, kernel_size=(3, 3), activation="relu", kernel_initializer='he_uniform', padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01))(inputs) layer2 = layers.Conv2D( 32, kernel_size=(3, 3), activation="relu", kernel_initializer='he_uniform', padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01))(layer1) layer3 = layers.MaxPooling2D(pool_size=(2, 2))(layer2) layer4 = layers.Conv2D( 64, kernel_size=(3, 3), activation="relu", kernel_initializer='he_uniform', padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01))(layer3) layer5 = layers.Conv2D( 64, kernel_size=(3, 3), activation="relu", kernel_initializer='he_uniform', padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01))(layer4) layer6 = layers.MaxPooling2D(pool_size=(2, 2))(layer5) layer7 = layers.Flatten()(layer6) #layer8 = layers.Dense(128, activation="relu", kernel_initializer='he_uniform')(layer7) classification = layers.Dense(n_outputs, activation="softmax")(layer7) else: # VGG 3 BLOCK layer1 = layers.Conv2D( 32, kernel_size=(3, 3), activation="relu", kernel_initializer='he_uniform', padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01))(inputs) layer2 = layers.Conv2D( 32, kernel_size=(3, 3), activation="relu", kernel_initializer='he_uniform', padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01))(layer1) layer3 = layers.MaxPooling2D(pool_size=(2, 2))(layer2) layer4 = layers.Conv2D( 64, kernel_size=(3, 3), activation="relu", kernel_initializer='he_uniform', padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01))(layer3) layer5 = layers.Conv2D( 64, kernel_size=(3, 3), activation="relu", kernel_initializer='he_uniform', padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01))(layer4) layer6 = layers.MaxPooling2D(pool_size=(2, 2))(layer5) layer7 = layers.Conv2D( 128, kernel_size=(3, 3), activation="relu", kernel_initializer='he_uniform', padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01))(layer6) layer8 = layers.Conv2D( 128, kernel_size=(3, 3), activation="relu", kernel_initializer='he_uniform', padding='same', kernel_regularizer=tf.keras.regularizers.l2(0.01))(layer7) layer9 = layers.MaxPooling2D(pool_size=(2, 2))(layer8) layer10 = layers.Flatten()(layer9) layer11 = layers.Dense(128, activation="relu", kernel_initializer='he_uniform')(layer10) classification = layers.Dense(n_outputs, activation="softmax")(layer11) return keras.Model(inputs=inputs, outputs=classification)
def do_train_flower(): import matplotlib.pyplot as plt import numpy as np import os import PIL import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.models import Sequential import pathlib data_dir='/root/.keras/datasets/flower_photos/' data_dir = pathlib.Path(data_dir) image_count = len(list(data_dir.glob('*/*.jpg'))) batch_size = 32 img_height = 180 img_width = 180 train_ds = tf.keras.preprocessing.image_dataset_from_directory( data_dir, validation_split=0.2, subset="training", seed=123, image_size=(img_height, img_width), batch_size=batch_size) val_ds = tf.keras.preprocessing.image_dataset_from_directory( data_dir, validation_split=0.2, subset="validation", seed=123, image_size=(img_height, img_width), batch_size=batch_size) class_names = train_ds.class_names print(class_names) for image_batch, labels_batch in train_ds: print(image_batch.shape) print(labels_batch.shape) break AUTOTUNE = tf.data.AUTOTUNE train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE) val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE) normalization_layer = layers.experimental.preprocessing.Rescaling(1./255) normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y)) image_batch, labels_batch = next(iter(normalized_ds)) first_image = image_batch[0] # Notice the pixels values are now in `[0,1]`. print(np.min(first_image), np.max(first_image)) data_augmentation = keras.Sequential( [ layers.experimental.preprocessing.RandomFlip("horizontal", input_shape=(img_height, img_width, 3)), layers.experimental.preprocessing.RandomRotation(0.1), layers.experimental.preprocessing.RandomZoom(0.1), ] ) num_classes = 5 model = Sequential([ data_augmentation, layers.experimental.preprocessing.Rescaling(1./255), layers.Conv2D(16, 3, padding='same', activation='relu'), layers.MaxPooling2D(), layers.Conv2D(32, 3, padding='same', activation='relu'), layers.MaxPooling2D(), layers.Conv2D(64, 3, padding='same', activation='relu'), layers.MaxPooling2D(), layers.Dropout(0.2), layers.Flatten(), layers.Dense(128, activation='relu'), layers.Dense(num_classes) ]) model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) model.summary() epochs=5 history = model.fit( train_ds, validation_data=val_ds, epochs=epochs ) acc = history.history['accuracy'] val_acc = history.history['val_accuracy'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs_range = range(epochs) plt.figure(figsize=(8, 8)) plt.subplot(1, 2, 1) plt.plot(epochs_range, acc, label='Training Accuracy') plt.plot(epochs_range, val_acc, label='Validation Accuracy') plt.legend(loc='lower right') plt.title('Training and Validation Accuracy') plt.subplot(1, 2, 2) plt.plot(epochs_range, loss, label='Training Loss') plt.plot(epochs_range, val_loss, label='Validation Loss') plt.legend(loc='upper right') plt.title('Training and Validation Loss') plt.show() sunflower_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/592px-Red_sunflower.jpg" sunflower_path = tf.keras.utils.get_file('Red_sunflower', origin=sunflower_url) img = keras.preprocessing.image.load_img( sunflower_path, target_size=(img_height, img_width) ) img_array = keras.preprocessing.image.img_to_array(img) img_array = tf.expand_dims(img_array, 0) # Create a batch predictions = model.predict(img_array) score = tf.nn.softmax(predictions[0]) print( "This image most likely belongs to {} with a {:.2f} percent confidence." .format(class_names[np.argmax(score)], 100 * np.max(score)) )
def train_cifar_9_layer_cnn( train_data, test_data, path, overwrite=False, use_relu=False, optimizer_config=tf.keras.optimizers.Adam(learning_rate=0.001), epochs=20): (train_images, train_labels) = train_data (test_images, test_labels) = test_data # Let's start building a model if not os.path.exists(path) or overwrite: if os.path.exists(path): shutil.rmtree(path) print( "TRAIN ANYWAY option enabled, create and train a new one ...") else: print(path, " - model not found, create and train a new one ...") model = models.Sequential() # In the first layer, please provide the input shape (32,32,3) model.add( layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3))) # Result will be 32 30*30 outputs model.add(layers.MaxPooling2D( (2, 2))) # Result will be 32 15*15 outputs model.add(layers.Conv2D( 64, (3, 3), activation='relu')) # Result will be 64 13*13 outputs model.add(layers.MaxPooling2D((2, 2))) # Result will be 64 6*6 outputs model.add(layers.Conv2D( 64, (3, 3), activation='sigmoid')) # Result will be 64 4*4 outputs model.add(layers.Flatten() ) # Result will be a vector with length 4*4*64 = 1024 if use_relu: model.add(layers.Dense( 128, activation='relu')) # Result will 64 outputs model.add(layers.Dense( 64, activation='relu')) # Result will 64 outputs else: model.add(layers.Dense( 128, activation='sigmoid')) # Result will 64 outputs model.add(layers.Dense( 64, activation='sigmoid')) # Result will 64 outputs model.add(layers.Dense( 10, activation='softmax')) # Result will be 10 outputs print(model.summary()) model.compile(optimizer=optimizer_config, loss=tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True), metrics=['accuracy']) training_history = model.fit(train_images, train_labels, epochs=epochs, validation_data=(test_images, test_labels)) test_loss, test_accuracy = model.evaluate(test_images, test_labels, verbose=2) print("Final Accuracy achieved is: ", test_accuracy) model.save(path) print("Model has been saved") plt.plot(training_history.history['accuracy'], label="Accuracy") plt.plot(training_history.history['val_accuracy'], label='val_accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.ylim([0.4, 1]) plt.legend(loc='lower right') #plt.show() else: print("Model found, there is no need to re-train the model ...")
probability value for each action in its action space. 2. Critic: This takes as input the state of our environment and returns an estimate of total rewards in the future. In our implementation, they share the initial layer. """ state_size = env.state_size num_actions = len(env.action_space) num_hidden = 128 input_x = layers.Input(shape=state_size, name='input_x') conv1 = layers.Conv3D(1, kernel_size=(1, 1, 1), activation='relu', kernel_initializer='he_uniform', padding='valid')(input_x) flatten = layers.Flatten()(conv1) common = layers.Dense(num_hidden, activation="relu")(flatten) action = layers.Dense(num_actions, activation="softmax")(common) critic = layers.Dense(1, activation='linear')(common) model = keras.Model(inputs=input_x, outputs=[action, critic]) """ ## Train """ optimizer = keras.optimizers.Adam(learning_rate=0.00001) huber_loss = keras.losses.Huber() action_probs_history = [] critic_value_history = [] rewards_history = [] running_reward = 0
def train_mnist_5_layer_mlp( train_data, test_data, path, overwrite=False, use_relu=True, optimizer_config=tf.keras.optimizers.Adam(learning_rate=0.001), epochs=20): if use_relu: print(" >> ACTIVATION BY ReLU ...") else: print(" >> ACTIVATION BY Sigmoid ...") (x, y) = train_data (test_images, test_labels) = test_data train_images, val_images, train_labels, val_labels = train_test_split( x, y, test_size=0.167, train_size=0.833) # Let's start building a model if not os.path.exists(path) or overwrite: if os.path.exists(path): shutil.rmtree(path) print( "TRAIN ANYWAY option enabled, create and train a new one ...") else: print("Model not found, create and train a new one ...") model = models.Sequential() model.add(layers.Flatten(input_shape=(28, 28, 1))) if not use_relu: model.add(layers.Dense(128, activation='sigmoid')) model.add(layers.Dense(128, activation='sigmoid')) model.add(layers.Dense(64, activation='sigmoid')) else: model.add(layers.Dense(128, activation='relu')) model.add(layers.Dense(128, activation='relu')) model.add(layers.Dense(64, activation='relu')) model.add(layers.Dense(10, activation='softmax')) print(model.summary()) model.compile(optimizer=optimizer_config, loss=tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True), metrics=['accuracy']) training_history = model.fit(train_images, train_labels, epochs=epochs, validation_data=(val_images, val_labels)) test_loss, test_accuracy = model.evaluate(test_images, test_labels, verbose=2) print("Final Accuracy achieved is: ", test_accuracy) model.save(path) print("Model has been saved") plt.plot(training_history.history['accuracy'], label="Accuracy") plt.plot(training_history.history['val_accuracy'], label='val_accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.ylim([0.8, 1]) plt.legend(loc='lower right') #plt.show() else: print("Model found, there is no need to re-train the model ...")
def load_model(model_signature: str, **params) -> object: """ NOTE: Keras常见陷阱:1.TF卷积核与Theano卷积核shape相同,加载时需用测试样本验证其表现,Keras无法区别 :param model_signature: :param params: """ model = None inputs, outputs = {}, {} # {name: shape} dicts if model_signature == _ModelSignature.TFSavedModel.signature: import tensorflow as tf # IMPROVE: check availability of ml backends # format_ = ModelManager._validate_format(params['format'], _ModelSignature.TFSavedModel) path = ModelManager._validate_path(params.get('path', None)) model = tf.saved_model.load(path, params.get('tags', None)) # == core == if params.get('signature_', None) is not None: model = model.signatures[params['signature_']] # TODO: append inputs, outputs spec to model object? so that predict() can adapt the fed inputs if hasattr(model, 'inputs') and hasattr(model, 'structured_outpus'): inputs = {model.inputs[0].name: model.inputs[0].shape} outputs = {'default': model.structured_outputs['default']} # IMPROVE: iterate pass elif model_signature == _ModelSignature.TLDyamicModel.signature: params_model = Params(model_func=None, model_args=None, weights_path=None).update_to(params) import tensorflow as tf import os.path as osp if params_model.model_func is None: raise ValueError(f"model_func must be specified for a '{model_signature}' model") idx_last_sep = params_model.model_func.rfind('.') module = safe_import_module(params_model.model_func[:idx_last_sep]) model_func = getattr(module, params_model.model_func[idx_last_sep+1:]) model = model_func(**params_model.model_args) if params_model.weights_path is not None: weights_path = ModelManager._validate_path(params_model.weights_path) model.load_weights(weights_path, skip=True) elif model_signature == _ModelSignature.TFHub_KerasLayer.signature: import tensorflow_hub as tf_hub # format_ = ModelManager._validate_format(params['format'], _ModelSignature.TFSavedModel) path = ModelManager._validate_path(params.get('path', None)) params_model = Params(input_shape=None, trainable=False).update_to(params) if params_model.input_shape.__len__() == 4: params_model.input_shape = params_model.input_shape[1:] # NOTE: it will be delayed-build pattern when `input_shape` is None. no weights info available until build. model = tf_hub.KerasLayer(path, input_shape=params_model.input_shape) model.trainable = params_model.trainable pass elif model_signature == _ModelSignature.KerasSequential.signature: # IMPROVE: check availability of ml backends from tensorflow.keras import Sequential, layers name = params['name'] # IMPROVE:parse name -> layers, or use structural config for iteration if name == '{conv-pool}*2-flat-dense-drop-dense': # NOTE: only for _test_\TF_1x_to_2x_3, output is len=10 logits model = Sequential([ # NOTE: 1.TF2.x已无需限定Input层的维度,甚至各层间都能自动衔接 # 2.Conv层中无需设定上一层的(h,w),只需设定filter数、kernel维度、padding(使h,w保持)等 # 3.但若不指定input_shape,Optimizer将无法加载其之前被保存的参数,只能重新初始化 layers.Conv2D(32, (5, 5), strides=(1, 1), padding='same', activation='relu'), layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), layers.Conv2D(64, (5, 5), strides=(1, 1), padding='same', activation='relu'), layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), layers.Flatten(), # 下面的神经网络需要1维的数据 layers.Dense(1024, activation='relu'), layers.Dropout(0.5), # TODO: 关闭Dropout @evluate,predict layers.Dense(10, activation='softmax') ]) elif name == 'dense-dense_softmax': params_model = Params(embedding_size=1024, class_count=None).update_to(params) if params_model.class_count is None: raise ValueError('class_count must be specified') model = Sequential([ layers.Dense(params_model.embedding_size, activation='relu'), layers.Dense(params_model.class_count, activation='softmax') ]) # TODO: need to return intermediate tf.Tensor required by embedding, loss calculation and evaluation. else: raise ValueError(f"Undefined model: {name}") pass elif model_signature == _ModelSignature.KerasModels_LoadModel.signature: import tensorflow as tf # IMPROVE: check availability of ml backends format_ = ModelManager._validate_format(params['format'], _ModelSignature.KerasModels_LoadModel) params_model = Params(path='', path_formatted='').update_to(params) path = ModelManager._validate_path(params_model.path) model = tf.keras.models.load_model(path) # == core == elif model_signature == _ModelSignature.TF_ImportGraphDef.signature: import tensorflow as tf # IMPROVE: check availability of ml backends format_ = ModelManager._validate_format(params['format'], _ModelSignature.TF_ImportGraphDef) params_model = Params(inputs='', outputs='').update_to(params) path = ModelManager._validate_path(params_model.path) # import PB model (frozen) in TF2.x. ref:https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt # ref:https://www.tensorflow.org/api_docs/python/tf/compat/v1/wrap_function def wrap_frozen_graph(pb_path, inputs, outputs, prefix=""): def _imports_graph_def(): tf.compat.v1.import_graph_def(graph_def, name=prefix) # turn off the default prefix "import/" graph_def = tf.compat.v1.GraphDef() loaded = graph_def.ParseFromString(open(pb_path, 'rb').read()) # == core == wrapped_import = tf.compat.v1.wrap_function(_imports_graph_def, []) # == core == import_graph = wrapped_import.graph return wrapped_import.prune( tf.nest.map_structure(import_graph.as_graph_element, inputs), tf.nest.map_structure(import_graph.as_graph_element, outputs)) model = wrap_frozen_graph(path, inputs=params_model.inputs, outputs=params_model.outputs) test_img = tf.ones([1, 224, 224, 3], dtype=tf.float32) # fixed shape is for test ONLY DEBUG(f"wrap_func test result: {model(test_img).shape}") else: raise ValueError(f"Unsupported model signature: {model_signature}") INFO(f"type of loaded model={type(model)}") INFO(f" inputs={inputs}, outputs={outputs}") return model
def train_pneumonia_binary_classification_cnn(train_data, test_data, path, overwrite=False, epochs=20, val_data=None, data_augmentation=False, img_size=64): (test_images, test_labels) = test_data if val_data is None: (x, y) = train_data train_images, val_images, train_labels, val_labels = train_test_split( x, y, test_size=0.167, train_size=0.833) else: (train_images, train_labels) = train_data (val_images, val_labels) = val_data # With data augmentation to prevent overfitting and handling the imbalance in dataset if data_augmentation: datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 30, # randomly rotate images in the range (degrees, 0 to 180) zoom_range=0.2, # Randomly zoom image width_shift_range= 0.1, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images datagen.fit(train_images) # Let's start building a model if not os.path.exists(path) or overwrite: if os.path.exists(path): shutil.rmtree(path) print( "TRAIN ANYWAY option enabled, create and train a new one ...") else: print("Model not found, create and train a new one ...") model = models.Sequential() model.add( layers.Conv2D(32, (3, 3), strides=1, padding='same', activation='relu', input_shape=(img_size, img_size, 1))) model.add(layers.BatchNormalization()) model.add(layers.MaxPool2D((2, 2), strides=2, padding='same')) model.add( layers.Conv2D(64, (3, 3), strides=1, padding='same', activation='relu')) model.add(layers.Dropout(0.1)) model.add(layers.BatchNormalization()) model.add(layers.MaxPool2D((2, 2), strides=2, padding='same')) model.add( layers.Conv2D(64, (3, 3), strides=1, padding='same', activation='relu')) model.add(layers.BatchNormalization()) model.add(layers.MaxPool2D((2, 2), strides=2, padding='same')) model.add( layers.Conv2D(128, (3, 3), strides=1, padding='same', activation='relu')) model.add(layers.Dropout(0.2)) model.add(layers.BatchNormalization()) model.add(layers.MaxPool2D((2, 2), strides=2, padding='same')) model.add( layers.Conv2D(256, (3, 3), strides=1, padding='same', activation='relu')) model.add(layers.Dropout(0.2)) model.add(layers.BatchNormalization()) model.add(layers.MaxPool2D((2, 2), strides=2, padding='same')) model.add(layers.Flatten()) model.add(layers.Dense(units=128, activation='relu')) model.add(layers.Dense(units=1, activation='sigmoid')) model.compile(optimizer="rmsprop", loss='binary_crossentropy', metrics=['accuracy']) print(model.summary()) learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience=2, verbose=1, factor=0.3, min_lr=0.000001) training_history = model.fit(datagen.flow(train_images, train_labels, batch_size=32), epochs=epochs, validation_data=datagen.flow( val_images, val_labels), callbacks=[learning_rate_reduction]) test_loss, test_accuracy = model.evaluate(test_images, test_labels, verbose=2) print("Final Accuracy achieved is: ", test_accuracy) model.save(path) print("Model has been saved") plt.plot(training_history.history['accuracy'], label="Accuracy") plt.plot(training_history.history['val_accuracy'], label='val_accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.ylim([0.7, 1]) plt.legend(loc='lower right') #plt.show() else: print("Model found, there is no need to re-train the model ...")
def build_IO_GEN(ae, dsvdd, latent_dim, lr, m): gen_lr = lr * 2 * 2 #################### # Autoencoder and # Encoder (Pre-trained) #################### encoder_layer_name = 'encoded' gen_dim = ae.get_layer(encoder_layer_name).input.shape[ 1:] # test, small noise input to Dense #################### # DSC_v #################### encoder = keras.Model(inputs=ae.input, outputs=ae.get_layer('encoded').output, name='DCAE_Encoder') encoder = keras.models.clone_model(encoder) # re-initialize weights l2_norm = 1e-4 d_x = keras.Input(shape=(64, 64) + (2 * m, ), name='d_x') y = encoder(d_x) # pre-trained encoder y = layers.Flatten()(y) y = layers.Dense(1, kernel_regularizer=keras.regularizers.l2(l2_norm), activation='sigmoid')(y) dsc = keras.Model(inputs=d_x, outputs=y, name='DSC') dsc.compile(loss=['binary_crossentropy'], metrics=[smooth_accuracy], optimizer=keras.optimizers.Adam(learning_rate=lr, beta_1=0.5)) print(dsc.summary()) dsc.trainable = False #################### # REG - SVDD #################### dsvdd = keras.Model(inputs=dsvdd.input, outputs=dsvdd.output, name='DSVDD') dsvdd.trainable = False #################### # GEN #################### y = g_x = keras.Input(shape=latent_dim, name='g_x') flag = False y = layers.Dense(np.prod(gen_dim), activation='relu')(y) for i, l in enumerate(ae.layers): if l.name == encoder_layer_name: flag = True if flag: y = l(y) gen = keras.Model(inputs=g_x, outputs=y, name='gen') print(gen.summary()) #################### # GAN #################### gan_opt = keras.optimizers.Adam(learning_rate=gen_lr, beta_1=.5) g_x = keras.Input(shape=latent_dim, name='g_x') x_star = gen(g_x) y = dsc(x_star) feat = dsvdd(x_star) gan = keras.Model(g_x, [feat, y], name="gan") gan.compile(loss={ 'DSVDD': feat_matching_loss, 'DSC': 'binary_crossentropy' }, metrics={'DSC': 'accuracy'}, loss_weights={ 'DSVDD': 10., 'DSC': 1. }, optimizer=gan_opt) return gan, gen, dsc
def main(): # model_logits = tf.keras.models.load_model('yzm_logits.h5', compile=False) # model_conv=tf.keras.models.load_model('yzm_conv.h5', compile=False) model_logits = tf.keras.models.load_model('yzm_logits.h5', compile=False) model_conv = tf.keras.models.load_model('yzm_conv.h5', compile=False) # model_conv = keras.Sequential([ # layers.Conv2D(32, 3, padding='same'), # layers.BatchNormalization(), # layers.Activation("relu"), # layers.MaxPool2D(pool_size=[2, 2], strides=2, padding="same"), # # # layers.Conv2D(32, 3, padding='same'), # layers.BatchNormalization(), # layers.Activation("relu"), # layers.MaxPool2D(pool_size=[2, 2], strides=2, padding="same"), # # layers.Conv2D(32, 3, padding='same'), # layers.BatchNormalization(), # layers.Activation("relu"), # layers.MaxPool2D(pool_size=[2, 2], strides=2, padding="same"), # ]) # model_logits = keras.Sequential([ # layers.Dropout(0.2), # layers.Dense(800), # layers.Activation("relu"), # layers.Dropout(0.4), # layers.Dense(62 * 4), # ]) # model_conv.build(input_shape=(None, 24, 72, 3)) # model_logits.build(input_shape=(None, 864)) variables=model_conv.trainable_variables+model_logits.trainable_variables lost_py=[] ture_save = 0.4325 for epoch in range(300000): for step,(x,y) in enumerate(x_tarin): with tf.GradientTape() as tape: logits=model_conv(x,training=True) logits=layers.Flatten()(logits) logits=model_logits(logits,training=True) y=tf.cast(y,dtype=tf.float32) loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,labels=y) loss_regular=[] for i in variables[::2]:#正则化 loss_regular.append((tf.nn.l2_loss(i))) loss=tf.reduce_mean(loss)#+0.0000005*tf.reduce_sum(loss_regular) grads=tape.gradient(loss,variables) optimizer.apply_gradients(zip(grads,variables)) print(epoch) if epoch %10==0: print("loss:",float(loss)) num=0 ture_1=0 num_train=0 ture_train=0 for step,(x,y) in enumerate(x_text): logits=model_conv(x,training=False) logits=layers.Flatten()(logits) logits=model_logits(logits,training=False) for gg in range(logits.shape[0]): x1 = logits[gg][:62] x2 = logits[gg][62:124] x3 = logits[gg][124:186] x4 = logits[gg][186:248] y1 = y[gg][:62] y2 = y[gg][62:124] y3 = y[gg][124:186] y4 = y[gg][186:248] if tf.argmax(x1)==tf.argmax(y1): if tf.argmax(x2)==tf.argmax(y2): if tf.argmax(x3)==tf.argmax(y3): if tf.argmax(x4) == tf.argmax(y4): ture_1=ture_1+1 num=x.shape[0]+num print("true_1",ture_1/num) if ture_1/num>ture_save: model_conv.save("yzm_conv.h5") model_logits.save("yzm_logits.h5") ture_save=ture_1/num print(ture_save)
all_max_heights = [] # so i can see the progression if there is any training_states = np.array([]) training_outputs = np.array([]) # -------------------------------preprocessing------------------------------- max_acceleration = 100 max_velocity = 1000 max_height = 100000 max_fuel = 10 max_time = 1000 # -------------------------------model architecture------------------------------- # try making the shape (None, 6) so I can put the entire rocket # state/output history as 1 data point instead of putting many model = models.Sequential([ layers.Flatten(input_shape=(6,)), layers.Dense(20, activation='relu'), layers.Dense(1, activation='sigmoid') # softmax is for classification of many different things # (like classifying between 10 different animals) # sigmoid compresses the value between 0 and 1 but not for classification ]) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # -------------------------------checkpoints------------------------------- checkpoint_dir = '../../rocket_v1_checkpoints' checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt_{epoch}') checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=checkpoint_prefix,
#Load MNIST dataset (consisits of labeled hand-written digits from 0 t0 9. Each image is 28x28 pixels in size). #split the data into training and testing data. (images_train, labels_train), (images_test, labels_test) = datasets.mnist.load_data() print('Data processing complete.') ''' Creating the model is very easy in tensorflow. You can add as many layers as you wish provided the input shape and the number of output units remains the same for a specific problem. ''' model = models.Sequential([ layers.Flatten( input_shape=(28, 28) ), #The flatten layer to flatten the 2D(28x28 px) image into a 1D(28*28=784) array. layers.Dense( 128, activation='elu' ), #Hidden layer 1 with elu activation for non linearity.(Dense = fully connected layer) layers.Dense(128, activation='elu' ), #Hidden layer 2 with elu activation for non linearity. layers.Dense(32, activation='elu' ), #Hidden layer 3 with elu activation for non linearity. layers.Dense(10, activation='softmax') #Output layer with softmax activation. ]) #Mention the loss function and the optimizer you want to use before fitting the model. #You can use many other loss functions and optimizer Visit tensorflow.org docs for more information.
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data() # Normalize pixel values to be between 0 and 1 train_images, test_images = train_images / 255.0, test_images / 255.0 # Model structure model = models.Sequential() model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3))) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(64, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(64, (3, 3), activation='relu')) model.add(layers.Flatten()) model.add(layers.Dense(64 * 3, activation='relu')) model.add(layers.Dense(64, activation='relu')) model.add(layers.Dense(32, activation='relu')) model.add(layers.Dense(10)) #according to cifar number of outputs # Model compilation model.compile( optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) # Model training history = model.fit(train_images, train_labels, epochs=15,
def __init__(self, model_name, klass_name, embedding_matrix, embedding_size=EMBEDDING_SIZE, input_length=MAX_DOCUMENT_LENGTH): self.klass_name = klass_name self.model = models.Sequential(name=f'{model_name}-model') self.model.add( layers.Embedding( embedding_matrix.shape[0], embedding_size, input_length=input_length, embeddings_initializer=initializers.Constant(embedding_matrix), trainable=False)) # model.add(layers.Embedding(len(tokenizer.word_index)+1, embedding_size, input_length=MAX_DOCUMENT_LENGTH)) # for trainable embedding layer self.model.add(layers.Dropout(0.1)) self.model.add( layers.Convolution1D( 16, kernel_size=4, activation='relu', strides=1, padding='same', kernel_constraint=constraints.MaxNorm(max_value=3))) self.model.add(layers.Dropout(0.5)) self.model.add( layers.Convolution1D( 12, kernel_size=8, activation='relu', strides=2, padding='same', kernel_constraint=constraints.MaxNorm(max_value=3))) self.model.add(layers.Dropout(0.5)) self.model.add( layers.Convolution1D( 8, kernel_size=16, activation='relu', strides=2, padding='same', kernel_constraint=constraints.MaxNorm(max_value=3))) self.model.add(layers.Dropout(0.5)) self.model.add(layers.Flatten()) self.model.add( layers.Dense(128, activation='relu', kernel_constraint=constraints.MaxNorm(max_value=3))) self.model.add(layers.Dropout(0.5)) self.model.add( layers.Dense(64, activation='relu', kernel_constraint=constraints.MaxNorm(max_value=3))) self.model.add(layers.Dropout(0.5)) self.model.add( layers.Dense(2, activation='softmax', kernel_constraint=constraints.MaxNorm(max_value=3))) self.model.compile( optimizer=optimizers.Adam(), #learning_rate=0.001), loss=losses.CategoricalCrossentropy(from_logits=False), metrics=[ metrics.CategoricalAccuracy(), metrics.Recall(class_id=0), metrics.Precision(class_id=0) ])