def slim_net(): # Initial spatial phase [Reduces input by a factor 1/4] input = Input(shape=(512, 512, 3), name='ip') x = Conv2D(filters=8, kernel_size=2, strides=2, padding='valid')(input) x = PReLU(shared_axes=[1, 2])(x) x = Conv2D(filters=32, kernel_size=2, strides=2, padding='valid')(x) b1, r1 = encode_bottleneck(x, proj_ch=16, out_ch=64, strides=2, separable=True, depthwise=True, pool=True) b2, p1 = encode_bottleneck(b1, proj_ch=16, out_ch=64, strides=1, separable=True, depthwise=True, preluop=True, pool=False) b3 = encode_bottleneck(b2, proj_ch=16, out_ch=64, strides=1, separable=True, depthwise=True, pool=False) b4, r2 = encode_bottleneck(b3, proj_ch=32, out_ch=128, strides=2, separable=True, depthwise=True, pool=True) b5, p2 = encode_bottleneck(b4, proj_ch=16, out_ch=128, strides=1, separable=True, depthwise=True, preluop=True, pool=False) b6 = encode_bottleneck(b5, proj_ch=16, out_ch=128, strides=1, separable=True, depthwise=True, pool=False) b7 = encode_bottleneck(b6, proj_ch=16, out_ch=128, strides=1, separable=True, depthwise=True, pool=False) b8 = encode_bottleneck(b7, proj_ch=16, out_ch=128, strides=1, separable=True, depthwise=True, pool=False) b9, r3 = encode_bottleneck(b8, proj_ch=16, out_ch=128, strides=2, separable=True, depthwise=True, pool=True) b10 = encode_bottleneck(b9, proj_ch=8, out_ch=128, strides=1, separable=True, depthwise=True, pool=False) b11 = encode_bottleneck(b10, proj_ch=8, out_ch=128, strides=1, dilation=2, separable=False, depthwise=False, pool=False) # dil -2 b12 = encode_bottleneck(b11, proj_ch=8, out_ch=128, strides=1, separable=True, depthwise=False, pool=False) b13 = encode_bottleneck(b12, proj_ch=8, out_ch=128, strides=1, dilation=4, separable=False, depthwise=False, pool=False) # dil -4 b14 = encode_bottleneck(b13, proj_ch=8, out_ch=128, strides=1, separable=True, depthwise=True, pool=False) b15 = encode_bottleneck(b14, proj_ch=8, out_ch=128, strides=1, dilation=8, separable=False, depthwise=False, pool=False) # dil -8 b16 = encode_bottleneck(b15, proj_ch=8, out_ch=128, strides=1, separable=True, depthwise=True, pool=False) b17 = encode_bottleneck(b16, proj_ch=8, out_ch=128, strides=1, dilation=2, separable=False, depthwise=False, pool=False) # dil -2 b18 = encode_bottleneck(b17, proj_ch=8, out_ch=128, strides=1, separable=True, depthwise=False, pool=False) b19 = encode_bottleneck(b18, proj_ch=8, out_ch=128, strides=1, dilation=4, separable=False, depthwise=False, pool=False) # dil -4 b20 = encode_bottleneck(b19, proj_ch=8, out_ch=128, strides=1, separable=True, depthwise=True, pool=False) b21 = encode_bottleneck(b20, proj_ch=8, out_ch=128, strides=1, dilation=8, separable=False, depthwise=False, pool=False) # dil -8 b22 = encode_bottleneck(b21, proj_ch=4, out_ch=128, strides=1, separable=False, depthwise=False, pool=False) # dil -1 d1 = decode_bottleneck(b22, res1=r3, res2=p2, proj_ch1=8, proj_ch2=8, out_ch=128, strides=1, rsize=32, pconv=True) d2 = decode_bottleneck(d1, res1=r2, res2=p1, proj_ch1=8, proj_ch2=4, out_ch=64, strides=1, rsize=64, pconv=True) d3 = decode_bottleneck(d2, res1=r1, res2=None, proj_ch1=4, proj_ch2=4, out_ch=32, strides=1, rsize=128, pconv=False) pout1 = PReLU(shared_axes=[1, 2])(d3) cout1 = Conv2DTranspose(filters=8, kernel_size=2, strides=2, padding='same')(pout1) # output size: 256 pout2 = PReLU(shared_axes=[1, 2])(cout1) cout2 = Conv2DTranspose(filters=2, kernel_size=2, strides=2, padding='same')(pout2) # output size: 512 model = Model(inputs=input, outputs=cout2) model.compile( optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) # Ensure you have sparse labels return model
padding='valid', activation='relu')(inputs_img) conv_layer_02 = Conv2D(filters=8, kernel_size=(50, 50), padding='valid', activation='relu')(conv_layer_01) max_pool = MaxPooling2D(pool_size=(4, 4), strides=None, padding="valid")(conv_layer_01) avg_pool = AveragePooling2D(pool_size=(3, 3), strides=None, padding="valid")(max_pool) outputs = Flatten()(Conv2D(filters=4, kernel_size=(17, 17), padding='valid', activation='relu')(avg_pool)) model = Model(inputs=inputs_img, outputs=outputs) model.compile(loss=tf.keras.losses.MSE, optimizer=tf.keras.optimizers.Adam(lr=0.0001), metrics=['MeanSquaredError']) model.fit_generator(generator=training_generator, validation_data=validation_generator, epochs=3 #,use_multiprocessing=True, # workers=6 ) model.save(DATA_PATH + 'work/best_model.h5') saved_model = tf.keras.models.load_model(DATA_PATH + 'work/best_model.h5') saved_model.summary()
# In[7]: x = Flatten()(vgg16_model.output) x = Dense(1000, activation='relu')(x) prediction = Dense(4, activation='softmax')(x) model = Model(inputs=vgg16_model.input, outputs=prediction) # In[8]: model.summary() # In[9]: model.compile(Adam(lr=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy']) # In[30]: tensorboard = callbacks.TensorBoard(log_dir='tb_logs', histogram_freq=0, batch_size=16, write_grads=True, write_graph=True) model_checkpoints = callbacks.ModelCheckpoint( "model_checkpoints/checkpoint-{val_loss:.3f}.h5", monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False,
# 增加 DropOut layer x = Dropout(0.5)(x) # 增加 Dense layer,以 softmax 產生個類別的機率值 output_layer = Dense(NUM_CLASSES, activation='softmax', name='softmax')(x) # 設定凍結與要進行訓練的網路層 net_final = Model(inputs=net.input, outputs=output_layer) for layer in net_final.layers[:FREEZE_LAYERS]: layer.trainable = False for layer in net_final.layers[FREEZE_LAYERS:]: layer.trainable = True # 使用 Adam optimizer,以較低的 learning rate 進行 fine-tuning net_final.compile(optimizer=Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy']) logdir = os.path.join( "logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1) # 輸出整個網路結構 print(net_final.summary()) # 訓練模型 net_final.fit_generator(train_batches, steps_per_epoch=train_batches.samples // BATCH_SIZE, validation_data=valid_batches, validation_steps=valid_batches.samples // BATCH_SIZE, epochs=NUM_EPOCHS, callbacks=[tensorboard_callback]) # 儲存訓練好的模型
def build(self, hp): nb_filters_0 = hp.Choice("nb_filters_0", values=[16, 32, 64, 128]) kernel_size = 3 kernel_initializer = hp.Choice( "kernel_initializer", values=["glorot_uniform", "he_normal"]) lr = hp.Float( "learning_rate", min_value=1e-4, max_value=1e-2, sampling="LOG", default=1e-3, ) nb_dense_neurons_1 = hp.Int("nb_dense_neurons_1", min_value=10, max_value=500, step=10, default=100) reg = hp.Float("regularization_value", min_value=1e-4, max_value=1, sampling="LOG", default=1e-2) reg_dense = hp.Float("reg_dense", min_value=1e-4, max_value=1, sampling="LOG", default=1e-2) dropout = hp.Float("dropout", min_value=0., max_value=0.9, step=0.1, default=0.5) input_layer = Input(shape=(10, 1)) output_channels = 7 x = Conv1D(filters=nb_filters_0, kernel_size=kernel_size, kernel_initializer=kernel_initializer, kernel_regularizer=l2(reg), padding='valid')(input_layer) x = BatchNormalization()(x) x = Activation("relu")(x) #x = MaxPooling1D(pool_size=2)(x) x = Dropout(dropout)(x) x = Conv1D(filters=nb_filters_0 * 2, kernel_size=kernel_size, kernel_initializer=kernel_initializer, kernel_regularizer=l2(reg), padding='valid')(x) x = BatchNormalization()(x) x = Activation("relu")(x) #x = MaxPooling1D(pool_size=2)(x) x = Dropout(dropout)(x) x = Conv1D(filters=nb_filters_0 * 4, kernel_size=kernel_size, kernel_initializer=kernel_initializer, kernel_regularizer=l2(reg), padding='valid')(x) x = BatchNormalization()(x) x = Activation("relu")(x) #x = MaxPooling1D(pool_size=2)(x) x = Dropout(dropout)(x) x = Flatten()(x) x = Dense(nb_dense_neurons_1, kernel_regularizer=l2(reg_dense))(x) x = Dropout(dropout)(x) output_layer = Dense(output_channels, activation="softmax")(x) model = Model(inputs=input_layer, outputs=output_layer) model.compile(loss="categorical_crossentropy", optimizer=Adam(learning_rate=lr), metrics=["accuracy"]) return model
y_test = np.array(y_test).reshape(-1, 1) x_train.shape, y_train.shape, x_test.shape, y_test.shape # NPLM 모델을 생성한다. EMB_SIZE = 32 VOCAB_SIZE = len(word2idx) + 1 x_input = Input(batch_shape=(None, x_train.shape[1])) x_embed = Embedding(input_dim=VOCAB_SIZE, output_dim=EMB_SIZE)( x_input) # weights 옵션으로 C행렬을 넣어줄 수 있다. - 사전학습 x_embed = Dropout(0.5)(x_embed) x_lstm = LSTM(64, dropout=0.5)(x_embed) y_output = Dense(n_topic, activation='softmax')(x_lstm) model = Model(x_input, y_output) # 학습, 예측용 모델 model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizers.Adam(learning_rate=0.01)) model.summary() # 모델을 학습한다. hist = model.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=512, epochs=30) # 사전학습해서 Fine-tuning # Loss history를 그린다 plt.plot(hist.history['loss'], label='Train loss') plt.plot(hist.history['val_loss'], label='Test loss') plt.legend() plt.title("Loss history") plt.xlabel("epoch")
model = Model(time_input, time_out) model.summary() if show: # you need graphviz plot_model(model, to_file="s-model.png", show_shapes=True, expand_nested=True) img = mpimg.imread('s-model.png') imgplot = plt.imshow(img) plt.show() rms = RMSprop() model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy]) history = model.fit(tr_pairs, tr_y, batch_size=128, epochs=epochs, validation_data=(te_pairs, te_y)) # Plot training & validation accuracy values plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.title('Model accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') plt.show()
kernel_size=sz, padding="valid", activation="relu", strides=1)(z) conv = GlobalMaxPooling1D()(conv) conv = Flatten()(conv) conv_blocks.append(conv) z = Concatenate()(conv_blocks) if len(conv_blocks) > 1 else conv_blocks[0] z = Dropout(drop)(z) model_output = Dense(len(label_idx), activation='softmax')(z) model = Model(model_input, model_output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) print(model.summary()) history = model.fit( X_train, y_train, batch_size=100, # 1회 학습시 주는 데이터 개수 epochs=20, # 전체 데이타 10번 학습 validation_data=(X_val, y_val)) epochs = range(1, len(history.history['acc']) + 1) plt.plot(epochs, history.history['acc']) plt.plot(epochs, history.history['val_acc']) plt.title('model accuracy')
conf.train_images, '', '') x_data_train, y_data_train = data_processor.load_train(normalize=True).get_training_data() #initialize the network input_layer = Input(shape=(784,), name='input') network = Dense(152, activation='tanh', name='dense_1')(input_layer) network = Dense(76, activation='tanh', name='dense_2')(network) network = Dense(38, activation='tanh', name='dense_3')(network) network = Dense(4, activation='tanh', name='dense_4')(network) network = Dense(38, activation='tanh', name='dense_5')(network) network = Dense(76, activation='tanh', name='dense_6')(network) network = Dense(152, activation='tanh', name='dense_7')(network) output = Dense(784, activation='tanh', name='output')(network) autoencoder = Model(inputs=input_layer, outputs=output, name='autoencoder') autoencoder.compile(optimizer=optimizers.Adadelta(learning_rate=1.0), loss='MSE', metrics=['accuracy']) # Create a callback that saves the model's weights cp_callback = ModelCheckpoint(filepath=conf.checkpoint_path, save_weights_only=True, verbose=1) #load an existing model to continue training if(not FLAGS.rebuild): try: autoencoder.load_weights(conf.checkpoint_path) except: print('No checkpoint found, building filters from scratch.') #run the training autoencoder.fit(x_data_train, x_data_train, epochs=conf.epochs, batch_size=conf.batch_size,
y_pred, labels, input_length, label_length = args return K.ctc_batch_cost(labels, y_pred, input_length, label_length) labels = Input(name='the_labels', shape=[max_plate_len], dtype='float32') input_len = Input(name='input_length', shape=[1], dtype='int64') label_len = Input(name='label_length', shape=[1], dtype='int64') loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_len, label_len]) #sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True) model = Model(inputs=[inp, labels, input_len, label_len], outputs=loss_out) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss model.compile(loss={'ctc': lambda y_tru, y_prd: y_prd}, optimizer='adam') def train(ep): model.fit_generator(generator=train_gen.next_batch(), steps_per_epoch=train_gen.batch_size, epochs=ep, validation_data=valid_gen.next_batch(), validation_steps=valid_gen.batch_size) #train(1) def save_model(): model_json = model.to_json()
layer.trainable = False k = k + 1 # Configuring the FC layers and output #flat = Flatten()(baseModel.output) # Add a FC NN hidden1 = Dense(512, activation='relu')(baseModel.output) drop1 = Dropout(0.5)(hidden1) hidden2 = Dense(512, activation='relu')(drop1) drop2 = Dropout(0.5)(hidden2) output = Dense(num_classes, activation='softmax')(drop2) # new model model = Model(inputs=baseModel.inputs, outputs=output) # Compile the model model.compile(optimizer=optimizers.Adam(lr=learning_rate), loss='categorical_crossentropy', metrics=['accuracy']) print(model.summary()) # Callbacks # Specify the log directory to the tensorboard tensorboard = TensorBoard(log_dir="/home/test/workspace/Fan/src/logs/{}". format(datetime.now().strftime("%d-%m-%Y_%H:%M:%S"))) cp_filepath = "/home/test/workspace/Fan/src/checkpoints/best_model_{}.h5".format( datetime.now().strftime("%d-%m-%Y_%H:%M:%S")) checkpoint = ModelCheckpoint(filepath=cp_filepath, monitor='val_accuracy', verbose=1, save_best_only=False, save_weights_only=False,
def create_model_fn(params, is_optimizer_adam=True): """ create model function and callbacks given the params :return: """ if params.image_dim[0] not in [224, 128] and params.pretrained: ValueError('hip to be square..')# need this for pretrained models. if (params.nb_layers_to_freeze and not params.pretrained) or (params.nb_layers_to_freeze == 0 and params.pretrained): ValueError('set the pretrained to TRUE if nb_layers_to_freeze is specified') if params.loss == 'triplet_semihard_loss' and not params.embedding_hidden_dim: ValueError('set the embedding_hidden_dim if triplet_semihard_loss is specified') _include_top = True _weights = None _metrics = ['categorical_accuracy', top_5_accuracy] if params.pretrained: logger.info('pretrained..') _include_top = False _weights = 'imagenet' # Define the top of the architecture if params.model_architecture == 'mobilenet': base_model = tf.keras.applications.mobilenet.MobileNet(input_shape=params.image_dim, alpha=1.0, depth_multiplier=1, dropout=params.dropout, include_top=_include_top, weights=_weights, classes=params.nb_classes, input_tensor=None, pooling=None) x = base_model.output _inputs = base_model.input reshape_size = 1024 elif params.model_architecture == 'resnet': base_model = tf.keras.applications.resnet50.ResNet50(input_shape=params.image_dim, include_top=_include_top, weights=_weights, classes=params.nb_classes) x = base_model.output _inputs = base_model.input reshape_size = 2048 elif params.model_architecture == 'densenet': base_model = tf.keras.applications.densenet.DenseNet121(input_shape=params.image_dim, include_top=_include_top, weights=_weights, classes=params.nb_classes) x = base_model.output _inputs = base_model.input reshape_size = 1024 elif params.model_architecture == 'convnet': " generic convnet" _inputs = layers.Input(shape=params.image_dim) filters = params.embedding_hidden_dim kernel = (3, 3) strides = (2, 2) x = _inputs for i in range(5): print(x) x = layers.Conv2D(filters, kernel, padding='valid', use_bias=False, strides=strides, name='conv{}'.format(i))(x) x = layers.BatchNormalization(axis=-1, name='conv{}_bn'.format(i))(x) x = layers.ReLU(6., name='conv{}_relu'.format(i))(x) reshape_size = filters else: raise ValueError("architecture not defined.") # If triplet loss, complete the structure if params.loss == 'triplet_semihard_loss': # re-set the metrics _metrics = None x = layers.GlobalAveragePooling2D()(x) shape = (1, 1, int(reshape_size * 1.0)) x = layers.Reshape(shape, name='reshape_1')(x) x = layers.Dropout(params.dropout, name='dropout')(x) # retrieve embedding x = layers.Conv2D(params.embedding_hidden_dim, (1, 1), padding='same', name='conv_embedding')(x) # l2 normalize x = layers.Reshape((params.embedding_hidden_dim,), name='reshape_2')(x) x = layers.Lambda(lambda _x: tf.keras.backend.l2_normalize(_x, axis=1), name='conv_embedding_norm')(x) model = Model(inputs=_inputs, outputs=x) def _loss_fn(y_true, y_pred): y_true = tf.keras.backend.argmax(y_true, axis=-1) return triplet_semihard_loss(labels=y_true, embeddings=y_pred, margin=params.triplet_margin) _loss = _loss_fn # Complete the rest of the architecture if pretrained weights are loaded. # TODO: This should be marked as something like 'complete the top of the architechture elif params.pretrained: logger.info("append the top to the structure..") _loss = 'categorical_crossentropy' x = layers.GlobalAveragePooling2D()(x) shape = (1, 1, int(reshape_size * 1.0)) x = layers.Reshape(shape, name='reshape_1')(x) x = layers.Dropout(params.dropout, name='dropout')(x) x = layers.Conv2D(params.nb_classes, (1, 1), padding='same', name='conv_preds')(x) x = layers.Reshape((params.nb_classes,), name='reshape_2')(x) x = layers.Activation('softmax', name='act_softmax')(x) model = Model(inputs=_inputs, outputs=x) else: # If neither, entire structure is defined already. _loss = 'categorical_crossentropy' model = base_model if params.nb_layers_to_freeze: for i, layer in enumerate(model.layers): if i < params.nb_layers_to_freeze: layer.trainable = False else: logger.info(layer.name) logger.info("{} out of {} layers frozen..".format(params.nb_layers_to_freeze, i)) if is_optimizer_adam: _opt = Adam(lr=params.lr_rate) else: _opt = sgd model.compile(optimizer=_opt, loss=_loss, metrics=_metrics ) tf.logging.info(model.summary()) return model
# Output: pre_output = Conv2D(64, 1, padding='same', activation='relu', name='pre_output')(decod_block4_conv1) output = Conv2D(4, 1, padding='same', activation='softmax', name='output')(pre_output) modelUNet = Model(inputs=input_, outputs=output) print(modelUNet.summary()) # The model is compiled with the dice_coefficient_loss function and the dice_coefficient_function metric: print("About to compile...") modelUNet.compile(optimizer=Adam(lr=1e-5), loss=dice_coefficient_loss, metrics=[dice_coefficient_function]) # EarlyStopping is applied incase the model stops improving with each epoch: callbacks = [tf.keras.callbacks.EarlyStopping(patience=2, monitor='val_loss')] print("About to fit the model...") history = modelUNet.fit(X_train, Y_train, validation_data=(X_val, Y_val), batch_size=16, epochs=5, shuffle=True, callbacks=callbacks) modelUNet.save('./Saved_models/group-HGG-UNet.h5', overwrite=True)
def build_cnn_base_model(X_train, y_train, X_val, y_val, embedding_matrix, maxlen, vocab_size, embedding_dim, learning_rate, epochs=3): inp = Input(shape=(maxlen, )) embedding = Embedding(vocab_size, embedding_dim, weights=[embedding_matrix], trainable=False)(inp) # need to change # layers conv0 = Conv1D(filters=128, kernel_size=3, padding="same", activation="relu")(embedding) pool0 = MaxPooling1D(pool_size=2)(conv0) conv1 = Conv1D(filters=128, kernel_size=3, padding="same", activation="relu")(pool0) pool1 = MaxPooling1D(pool_size=2, padding="same")(conv1) conv2 = Conv1D(filters=128, kernel_size=3, padding="same", activation="relu")(pool1) pool2 = MaxPooling1D(pool_size=2, padding='same')(conv2) conv3 = Conv1D(filters=128, kernel_size=3, padding="same", activation="relu")(pool2) pool3 = MaxPooling1D(pool_size=2, padding='same')(conv3) flatten = Flatten()(pool3) dense0 = Dense(128, activation="relu")(flatten) dropout = Dropout(0.2)(dense0) dense1 = Dense(1, activation="sigmoid")(dropout) model = Model(inp, dense1) print(model.summary()) # train model model.compile( loss="binary_crossentropy", optimizer=optimizers.Adam(lr=learning_rate), metrics=["accuracy"], ) history = model.fit(X_train, y_train, epochs=epochs, validation_data=(X_val, y_val), batch_size=256) return model, history
class GAN(): def __init__(self): self.img_rows = 28 self.img_cols = 28 self.channels = 1 self.img_shape = (self.img_rows, self.img_cols, self.channels) self.latent_dim = 100 optimizer = Adam(0.0002, 0.5) self.discriminator = self.build_discriminator() self.discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) # Build the generator self.generator = self.build_generator() # The generator takes noise as input and generates imgs z = Input(shape=(self.latent_dim, )) img = self.generator(z) self.discriminator.trainable = False # The discriminator takes generated images as input and determines validity validity = self.discriminator(img) # The combined model (stacked generator and discriminator) # Trains the generator to fool the discriminator self.combined = Model(z, validity) self.combined.compile(loss='binary_crossentropy', optimizer=optimizer) def build_generator(self): model = Sequential() model.add(Dense(256, input_dim=self.latent_dim)) model.add(LeakyReLU(alpha=0.2)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(512)) model.add(LeakyReLU(alpha=0.2)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(1024)) model.add(LeakyReLU(alpha=0.2)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(np.prod(self.img_shape), activation='tanh')) model.add(Reshape(self.img_shape)) model.summary() #打印网络结构 noise = Input(shape=(self.latent_dim, )) img = model(noise) return Model(noise, img) def build_discriminator(self): model = Sequential() model.add(Flatten(input_shape=self.img_shape)) model.add(Dense(512)) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(256)) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(1, activation='sigmoid')) model.summary() img = Input(shape=self.img_shape) validity = model(img) return Model(img, validity) def train(self, epochs, batch_size=128, sample_interval=50): # Load the dataset (X_train, _), (_, _) = mnist.load_data() # Rescale -1 to 1 X_train = X_train / 127.5 - 1. X_train = np.expand_dims(X_train, axis=3) config = tensorflow.ConfigProto() config.gpu_options.allow_growth = True #允许显存增长 set_session(tensorflow.Session(config=config)) # Adversarial ground truths valid = np.ones((batch_size, 1)) fake = np.zeros((batch_size, 1)) for epoch in range(epochs): # --------------------- # Train Discriminator # --------------------- # Select a random batch of images idx = np.random.randint(0, X_train.shape[0], batch_size) imgs = X_train[idx] noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) # Generate a batch of new images gen_imgs = self.generator.predict(noise) # Train the discriminator d_loss_real = self.discriminator.train_on_batch(imgs, valid) d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # --------------------- # Train Generator # --------------------- noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) # For the combined model we will only train the generator # Train the generator (to have the discriminator label samples as valid) g_loss = self.combined.train_on_batch(noise, valid) # Plot the progress print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss)) # If at save interval => save generated image samples if (epoch + 1) % sample_interval == 0: self.sample_images(epoch) def sample_images(self, epoch): r, c = 5, 5 noise = np.random.normal(0, 1, (r * c, self.latent_dim)) gen_imgs = self.generator.predict(noise) # Rescale images 0 - 1 gen_imgs = 0.5 * gen_imgs + 0.5 fig, axs = plt.subplots(r, c) cnt = 0 for i in range(r): for j in range(c): axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') axs[i, j].axis('off') cnt += 1 fig.savefig("images/%d.png" % epoch) plt.close()
# Creating model input_data = Input( shape=(INPUT_DIM, )) # Input placeholder. Inputs have dimension 2 encoded = Dense(ENCODING_DIM, activation='relu')(input_data) # Encoding of the input decoded = Dense(INPUT_DIM, activation='sigmoid')(encoded) # Reconstruction of the input autoencoder = Model( input_data, decoded ) # The autoencoder represent a model of the identity function from inp to out encoder = Model(input_data, encoded) # Encoder model encoded_input = Input(shape=(ENCODING_DIM, )) # Encoded input placeholder decoder_layer = autoencoder.layers[-1] # Last layer of autoencoder decoder = Model(encoded_input, decoder_layer(encoded_input)) # Decoder model autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy') # Compile # Preparing data (x_train, _), (x_test, _) = mnist.load_data() x_train = x_train.astype('float32') / 255. x_test = x_test.astype('float32') / 255. x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) autoencoder.fit(x_train, x_train, epochs=50, batch_size=256, shuffle=True, validation_data=(x_test, x_test), callbacks=[TensorBoard(log_dir='/tmp/autoencoder')])
[maxpool_0_3, maxpool_1_3, maxpool_2_3]) flatten = TimeDistributed(Flatten())(concatenated_tensor) output = Dropout(0.5)(flatten) # biLSTM Layer bilstm = Bidirectional( LSTM(units=200, return_sequences=True, recurrent_dropout=0.1))(output) # variational biLSTM) outputs = TimeDistributed(Dense(1, activation="sigmoid"))(bilstm) model = Model(inputs=inputs, outputs=outputs) opt = tf.keras.optimizers.Adam(lr=0.001) # model.compile(optimizer=opt, loss="binary_crossentropy", metrics=["binary_accuracy"], sample_weight_mode='temporal') model.compile( optimizer=opt, loss="binary_crossentropy", metrics=["binary_accuracy"], sample_weight_mode="temporal", ) print(model.summary()) # Evaluation train_test_validate( model, X_train, Y_train, X_test, Y_test, sample_weights_train, sample_weights_test, )
input1 = Input(shape=(13,)) dense1 = Dense(50, activation='relu')(input1) dense2 = Dense(45, activation='relu')(dense1) dense3 = Dense(40, activation='relu')(dense2) dense4 = Dense(35, activation='relu')(dense3) dense5 = Dense(30, activation='relu')(dense4) dense6 = Dense(30, activation='relu')(dense5) dense7 = Dense(25, activation='relu')(dense6) dense8 = Dense(25, activation='relu')(dense7) dense9 = Dense(25, activation='relu')(dense8) outputs = Dense(1)(dense9) model = Model(inputs = input1, outputs = outputs) model.summary() #3. 컴파일, 훈련 model.compile(loss = 'mse', optimizer='adam', metrics = ['mae']) model.fit(x_train, y_train, epochs=100, batch_size=7, validation_split=0.2) #4. 평가, 예측 loss, mae = model.evaluate(x_test, y_test) print("loss : ", loss) print("mae : ", mae) y_predict = model.predict(x_test) #print(y_predict) #RMSE 구하기 from sklearn.metrics import mean_squared_error def RMSE(y_test, y_predict) : return np.sqrt(mean_squared_error(y_test, y_predict)) #sqrt는 루트 print("RMSE :" , RMSE(y_test, y_predict))
kernel_initializer = RandomNormal(mean = 0, stddev = 0.01), bias_initializer = RandomNormal(mean = 0.5, stddev = 0.01))) #call the convnet Sequential model on each of the input tensors so params will be shared encoded_A = conv_net(input_A) encoded_B = conv_net(input_B) #layer to merge two encoded inputs with the l1 distance between them L1_layer = Lambda(lambda tensors:K.backend.abs(tensors[0] - tensors[1])) L1_distance = L1_layer([encoded_A, encoded_B]) prediction = Dense(units = 1, activation = 'sigmoid', bias_initializer = RandomNormal(mean = 0.5, stddev = 0.01))(L1_distance) siamese_net = Model(inputs = [input_A, input_B], outputs = prediction) optimizer = Adam(0.001) siamese_net.compile(loss = "binary_crossentropy", optimizer = optimizer) siamese_net.count_params() print('Model Building Finished') def get_pair_train_data(label_list, img_list, replace = False): labels = np.random.choice(nb_class, size = 2, replace = replace) label_pair = [ label_list[labels[0]], label_list[labels[1]], ] img_pair = [ img_list[labels[0]], img_list[labels[1]], ] return label_pair, img_pair print('Training Loop Started') n_iter = 10
concate1 = layers.concatenate([upsample1, input_tensor], axis=-1) dec_conv1a = layers.Conv2D(64, 3, padding='same', activation='relu')(concate1) dec_conv1b = layers.Conv2D(32, 3, padding='same', activation='relu')(dec_conv1a) ## MISSING LAYER dec_conv1c = layers.Conv2D(3, 3, padding='same')(dec_conv1b) last_layer = layers.LeakyReLU(alpha=0.1)(dec_conv1c) model = Model(input_tensor, last_layer) print(model.summary()) # Using adam as specified in the paper, beta1 = 0.9, beta2 = 0.99, e = 10^-8 opt = optimizers.Adam(lr=1e-4, beta_1=0.9, beta_2=0.99) model.compile(loss='mean_squared_error', optimizer=opt, metrics=['mean_squared_error']) history = model.fit(train_x, train_y, batch_size=batch_size, epochs=epochs, validation_data=(val_x, val_y), shuffle=True) # prepare loss acc plots mse = history.history['mean_squared_error'] # use acc instead val_mse = history.history['val_mean_squared_error'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(1, len(mse) + 1)
headModel = baseModel.output headModel = AveragePooling2D(pool_size=(7, 7))(headModel) headModel = Flatten(name="flatten")(headModel) headModel = Dense(128, activation="relu")(headModel) headModel = Dropout(0.5)(headModel) headModel = Dense(2, activation="softmax")(headModel) model = Model(inputs=baseModel.input, outputs=headModel) for layer in baseModel.layers: layer.trainable = False print("compiling model...") opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS) model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"]) print("training head...") H = model.fit(aug.flow(trainX, trainY, batch_size=BS), steps_per_epoch=len(trainX) // BS, validation_data=(testX, testY), validation_steps=len(testX) // BS, epochs=EPOCHS) print("evaluating network...") predIdxs = model.predict(testX, batch_size=BS) predIdxs = np.argmax(predIdxs, axis=1) print( classification_report(testY.argmax(axis=1),
def emd_loss(y_true, y_pred): #y_true = K.cast(y_true, y_pred.dtype) """ Input comes in in 8x8 looking like this: arrange8x8 = np.array([ 28,29,30,31,0,4,8,12, 24,25,26,27,1,5,9,13, 20,21,22,23,2,6,10,14, 16,17,18,19,3,7,11,15, 47,43,39,35,35,34,33,32, 46,42,38,34,39,38,37,36, 45,41,37,33,43,42,41,40, 44,40,36,32,47,46,45,44]) Remapping using array from telescope.py """ print(tf.shape(y_true).numpy()) remap_8x8 = [ 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31, 24, 25, 26, 27, 16, 17, 18, 19, 8, 9, 10, 11, 0, 1, 2, 3, 59, 51, 43, 35, 58, 50, 42, 34, 57, 49, 41, 33, 56, 48, 40, 32 ] remap_8x8_matrix = np.zeros(48 * 64, dtype=np.float32).reshape((64, 48)) for i in range(48): remap_8x8_matrix[remap_8x8[i], i] = 1 #y_true=K.reshape((y_true,(-1,64)),remap_8x8_matrix) #y_pred=K.reshape((y_pred,(-1,64)),remap_8x8_matrix) y_pred_443 = (y_pred)[:, remap_8x8_matrix].reshape(-1, 8, 8, 1) y_true_443 = (y_true)[:, remap_8x8_matrix].reshape(-1, 8, 8, 1) #CNN EMD Reshape using arrange 443 y_pred_443 = (y)[:, arrange443].reshape(-1, 4, 4, 3) y_true_443 = (y)[:, arrange443].reshape(-1, 4, 4, 3) X1_train = y_pred_443 X1_val = y_true_443 #Loading EMD Models with num_model.h5 [1,8] model_directory = os.path.join(current_directory, r'Best/3.h5') print(model_directory) input1 = Input(shape=( 4, 4, 3, ), name='input_1') input2 = Input(shape=( 4, 4, 3, ), name='input_2') x = Concatenate(name='concat')([input1, input2]) output = Dense(1, name='output')(x) model = load_model(model_directory) model.summary() # make a model that enforces the symmetry of the EMD function by averging the outputs for swapped inputs output = Average(name='average')( [model((input1, input2)), model((input2, input1))]) sym_model = Model(inputs=[input1, input2], outputs=output, name='sym_model') sym_model.summary() sym_model.compile(optimizer='adam', loss='msle', metrics=['mse', 'mae', 'mape', 'msle']) history = sym_model.fit((X1_train, X2_train), y_train, validation_data=((X1_val, X2_val), y_val), epochs=num_epochs, verbose=1, batch_size=32, callbacks=callbacks) y_val_preds = sym_model.predict((X1_val, X2_val)) (y_val_preds[y_val > 0].flatten() - y_val[y_val > 0].flatten()) / y_val[y_val > 0].flatten() rel_diff = (y_val_preds[y_val > 0].flatten() - y_val[y_val > 0].flatten()) / y_val[y_val > 0].flatten() return (np.std(rel_diff))
# placing the head FC model on top of the base model - this will become # the actual model we will train model = Model(inputs=baseModel.input, outputs=headModel) # looping over all layers in the base model and freeze them so they will # NOT be updated during the first training process for layer in baseModel.layers: layer.trainable = False # compiling our model (this needs to be done after our setting our # layers to being non-trainable print("[INFO] compiling model...") opt = SGD(lr=config.MIN_LR, momentum=0.9) model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) # checking to see if we are attempting to find an optimal learning rate # before training for the full number of epochs if args["lr_find"] > 0: # initializing the learning rate finder and then train with learning # rates ranging from 1e-10 to 1e+1 print("[INFO] finding learning rate...") lrf = LearningRateFinder(model) lrf.find(aug.flow(trainX, trainY, batch_size=config.BATCH_SIZE), 1e-10, 1e+1, stepsPerEpoch=np.ceil( (trainX.shape[0] / float(config.BATCH_SIZE))), epochs=20,
attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs]) # Concat attention input and decoder LSTM output decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_outputs, attn_out]) # dense layer decoder_dense = TimeDistributed(Dense(y_voc, activation='softmax')) decoder_outputs = decoder_dense(decoder_concat_input) # Define the model model = Model([encoder_inputs, decoder_inputs], decoder_outputs) model.summary() model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy') es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2) start_train = time.time() history = model.fit([x_tr, y_tr[:, :-1]], y_tr.reshape(y_tr.shape[0], y_tr.shape[1], 1)[:, 1:], epochs=50, callbacks=[es], batch_size=512, validation_data=([x_val, y_val[:, :-1]], y_val.reshape(y_val.shape[0], y_val.shape[1], 1)[:, 1:])) stop_train = time.time() print("Time for training: ", stop_train - start_train)
min_delta = 0, patience = 5, verbose = 1, restore_best_weights = True) # we put our call backs into a callback list callbacks = [earlystop, checkpoint] # In[11]: # Part 3 - Training the CNN # Compiling the CNN model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy']) # Training the CNN on the Training set and evaluating it on the Test set history=model.fit_generator( train_generator, steps_per_epoch = train_generator.samples // 32, validation_data = validation_generator, validation_steps = validation_generator.samples // 32, callbacks=callbacks, epochs = 10) loss_tr, acc_tr = model.evaluate_generator(train_generator) print(loss_tr) print(acc_tr)
def create_darknet53(IMG_SIZE, num_categories=4): inputs = Input(shape=(IMG_SIZE, IMG_SIZE, 3)) x = Block1(inputs, [3, 3], [1, 2], [32, 64]) x1 = Block1(x, [1, 3], [1, 1], [32, 64]) x = Add()([x, x1]) x = Block2(x, 3, 2, 128) x1 = Block1(x, [1, 3], [1, 1], [64, 128]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [64, 128]) x = Add()([x, x1]) x = Block2(x, 3, 2, 256) x1 = Block1(x, [1, 3], [1, 1], [128, 256]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [128, 256]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [128, 256]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [128, 256]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [128, 256]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [128, 256]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [128, 256]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [128, 256]) x = Add()([x, x1]) x = Block2(x, 3, 2, 512) x1 = Block1(x, [1, 3], [1, 1], [256, 512]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [256, 512]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [256, 512]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [256, 512]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [256, 512]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [256, 512]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [256, 512]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [256, 512]) x = Add()([x, x1]) x = Block2(x, 3, 2, 1024) x1 = Block1(x, [1, 3], [1, 1], [512, 1024]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [512, 1024]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [512, 1024]) x = Add()([x, x1]) x1 = Block1(x, [1, 3], [1, 1], [512, 1024]) x = Add()([x, x1]) x = AveragePooling2D()(x) x = Flatten()(x) outputs = Dense(num_categories, activation='softmax')(x) model = Model(inputs, outputs) if num_categories == 2: loss = 'binary_crossentropy' elif num_categories > 2: loss = 'sparse_categorical_crossentropy' model.compile(optimizer='adam', loss=loss, metrics=['accuracy']) return model
def _build(self): #### THE MODEL THAT WILL BE TRAINED rnn_x = Input(shape=(None, Z_DIM + ACTION_DIM + 1)) lstm = LSTM(HIDDEN_UNITS, return_sequences=True, return_state=True) lstm_output_model, _, _ = lstm(rnn_x) mdn = Dense(GAUSSIAN_MIXTURES * (3 * Z_DIM) + 1) mdn_model = mdn(lstm_output_model) model = Model(rnn_x, mdn_model) #### THE MODEL USED DURING PREDICTION state_input_h = Input(shape=(HIDDEN_UNITS, )) state_input_c = Input(shape=(HIDDEN_UNITS, )) lstm_output_forward, state_h, state_c = lstm( rnn_x, initial_state=[state_input_h, state_input_c]) mdn_forward = mdn(lstm_output_forward) forward = Model([rnn_x] + [state_input_h, state_input_c], [mdn_forward, state_h, state_c]) #### LOSS FUNCTION def rnn_z_loss(y_true, y_pred): z_true, rew_true = self.get_responses(y_true) d = GAUSSIAN_MIXTURES * Z_DIM z_pred = y_pred[:, :, :(3 * d)] z_pred = K.reshape(z_pred, [-1, GAUSSIAN_MIXTURES * 3]) log_pi, mu, log_sigma = self.get_mixture_coef(z_pred) flat_z_true = K.reshape(z_true, [-1, 1]) z_loss = log_pi + self.tf_lognormal(flat_z_true, mu, log_sigma) z_loss = -K.log(K.sum(K.exp(z_loss), 1, keepdims=True)) z_loss = K.mean(z_loss) return z_loss def rnn_rew_loss(y_true, y_pred): z_true, rew_true = self.get_responses(y_true) #, done_true d = GAUSSIAN_MIXTURES * Z_DIM reward_pred = y_pred[:, :, -1] rew_loss = K.binary_crossentropy(rew_true, reward_pred, from_logits=True) rew_loss = K.mean(rew_loss) return rew_loss def rnn_loss(y_true, y_pred): z_loss = rnn_z_loss(y_true, y_pred) rew_loss = rnn_rew_loss(y_true, y_pred) return Z_FACTOR * z_loss + REWARD_FACTOR * rew_loss opti = Adam(lr=LEARNING_RATE) model.compile(loss=rnn_loss, optimizer=opti, metrics=[rnn_z_loss, rnn_rew_loss]) #, rnn_done_loss # model.compile(loss=rnn_loss, optimizer='rmsprop', metrics = [rnn_z_loss, rnn_rew_loss, rnn_done_loss]) return (model, forward)
import sys import time import tqdm from tensorflow.keras.layers import Input from tensorflow.keras.models import Model from tensorflow.keras.optimizers import Adam from flowket.optimization import loss_for_energy_minimization from flowket.machines import ConvNetAutoregressive2D from flowket.samplers import AutoregressiveSampler, FastAutoregressiveSampler inputs = Input(shape=(10, 10), dtype='int8') convnet = ConvNetAutoregressive2D(inputs, depth=40, num_of_channels=32, weights_normalization=False) predictions, conditional_log_probs = convnet.predictions, convnet.conditional_log_probs model = Model(inputs=inputs, outputs=predictions) conditional_log_probs_model = Model(inputs=inputs, outputs=conditional_log_probs) optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999) model.compile(optimizer=optimizer, loss=loss_for_energy_minimization) sampler = FastAutoregressiveSampler(conditional_log_probs_model, batch_size=2 ** 10)
y.append(y_temp) # Since we need an N x T x D input X = np.array(X).reshape(-1, T, D) y = np.array(y) print(X.shape) print(y.shape) N, T, D = X.shape i_layer = Input(shape = (T, D)) h_layer = SimpleRNN(10)(i_layer) o_layer = Dense(1)(h_layer) model = Model(i_layer, o_layer) model.compile(loss = 'mse', optimizer = Adam(lr = 0.1)) index = -N//4 report = model.fit(X[:index], y[:index], epochs=50, validation_data=(X[index:], y[index:])) plt.plot(report.history['loss'], label='training_loss') plt.plot(report.history['val_loss'], label='validation_loss') plt.legend() y_test = y[index:] y_pred = [] X_end = X[index] while len(y_pred) < len(y_test): pred = model.predict(X_end.reshape(1, -1))[0,0] y_pred.append(pred)
# this script builds and trains a simple feed forward neural network using the Keras api of tensorflow data = read_data('train') y = data["Survived"].to_numpy() X = data.drop(columns="Survived").to_numpy() # Definition of the networks hyper-parameters input_dim = len(X[0]) epochs = 100 hidden_sizes = [10, 20, 10] # Definition of the model input_placeholder = Input(shape=(input_dim, )) layer = input_placeholder while len(hidden_sizes) > 0: dim = hidden_sizes.pop(0) layer = Dense(dim, activation='sigmoid')(layer) layer = Dropout(0.1)(layer) output = Dense(1, activation='sigmoid')(layer) model = Model(input_placeholder, output) # Compile model opt = Adam(lr=1e-2, decay=1e-2 / epochs) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) # Training of the model model.fit(X, y, epochs=epochs, batch_size=20, shuffle=True) # save the model model.save("Models/FeedForward.hdf5")
class Agent(): """ Agent object which initalizes and trains the keras model. """ def __init__(self, actions, height=80, width=80, channels=1, discount=0.95, loss="huber", env="Breakout-v0", model_dir=None): """ Initializes the parameters of the model. Args: height: Height of the image width: Width of the image channels: Number of channels, history of past frame discount: Discount_Factor for Q Learning update """ self.height = height self.width = width self.channels = channels self.discount = discount self.actions = actions self.env = env self.loss = loss self.epoch_num = 0 self.model_dir = model_dir self.max_reward = 0 self.cur_reward = 0 self.reward_tensor = K.variable(value=0) if model_dir is not None: self.tbCallBack = TensorBoard( log_dir=model_dir, histogram_freq=0, write_graph=True, write_images=True) def create_model( self, lr, type="vanilla", rescale_value=255.0, ): """ Builds the DQN Agent architecture. Source:https://cs.corp.google.com/piper///depot/google3/third_party/py/ dopamine/agents/dqn/dqn_agent.py?q=DQN&dr=CSs&l=15 This initializes the model as per the specifications mentioned in the DQN paper by Deepmind. This is a sequential model implemention of tf.keras. The compiled model is returned by the Method. Args: Returns: Model: Compiled Model """ #with tf.device('/gpu:0'): self.image_frames = Input(shape=(self.height, self.width, self.channels)) #self.normalize = Lambda(lambda input: input/255.0) self.conv1 = Conv2D( filters=32, kernel_size=(8, 8), strides=(4, 4), activation="relu", name="conv1")( Lambda(lambda input: input / float(rescale_value))( self.image_frames)) self.conv2 = Conv2D( filters=64, kernel_size=(4, 4), strides=(2, 2), activation="relu", name="conv2")( self.conv1) self.conv3 = Conv2D( filters=64, kernel_size=(3, 3), strides=(1, 1), activation="relu", name="conv3")( self.conv2) self.flattened = Flatten(name="flattened")(self.conv3) self.fully_connected_1 = Dense( units=512, activation="relu", name="fully_connected_1")( self.flattened) self.q_values = Dense( units=self.actions, activation="linear", name="q_values")( self.fully_connected_1) self.model = Model(inputs=[self.image_frames], outputs=[self.q_values]) self.optimizer = Adam(lr=lr) if self.loss == "huber": self.loss = huber_loss K.get_session().run(tf.global_variables_initializer()) def reward(y_true, y_pred): return self.reward_tensor self.model.compile( optimizer=self.optimizer, loss=self.loss, metrics=["mse", reward]) return self.model def batch_train(self, curr_state, next_state, immediate_reward, action, done, target, type="Double"): """ Computes the TD Error for a given batch of tuples. Here, we randomly sample episodes from the Experience buffer and use this to train our model. This method computes this for a batch and trains the model. Args: curr_state(array): Numpy array representing an array of current states of game next_state(array): Numpy array for immediate next state of the game action(array): List of actions taken to go from current state to the next reward(array): List of rewards for the given transition done(bool): if this is a terminal state or not. target(keras.model object): Target network for computing TD error """ if type == "Double": forward_action = np.argmax(self.model.predict(next_state), axis=1) predicted_qvalue = target.predict(next_state) # BxN matrix B = forward_action.size forward_qvalue = predicted_qvalue[np.arange(B), forward_action] # Bx1 vec elif type == "Vanilla": forward_qvalue = np.max(target.predict(next_state), axis=1) discounted_reward = (self.discount * forward_qvalue * (1 - done)) Q_value = immediate_reward + discounted_reward target_values = self.model.predict(curr_state) target_values[range(target_values.shape[0]), action] = Q_value """ for i, target in enumerate(target_values): target_values[i, action[i]] = Q_value[i] """ callbacks = [] # Update epoch number for TensorBoard. K.set_value(self.reward_tensor, self.cur_reward) if self.model_dir is not None and self.epoch_num % TB_LOGGING_EPOCHS == 0: callbacks.append(self.tbCallBack) self.model.fit( curr_state, target_values, verbose=0, initial_epoch=self.epoch_num, callbacks=callbacks, epochs=self.epoch_num + 1) self.epoch_num += 1 def predict_action(self, state): """ Predict the action for a given state. Args: state(float): Numpy array Return: action(int): Discrete action to sample """ #state = downsample_state(convert_greyscale(state)) #state = np.expand_dims(state, axis=0) if np.ndim(state) == 3: state = np.expand_dims(state, axis=0) return np.argmax(self.model.predict(state)) def play(self, env, directory, mode): """ Returns the total reward for an episode of the game.""" steps = [] state = env.reset() done = False tot_reward = 0 actions = [0] * self.actions while not done: if mode != "Train": s = env.render("rgb_array") steps.append(s) action = self.predict_action(state) actions[action] += 1 state, reward, done, _ = env.step(action) tot_reward += reward self.cur_reward = tot_reward if mode != "Train" and tot_reward > self.max_reward: print("New high reward: ", tot_reward) clip = ImageSequenceClip(steps, fps=30) clip.write_gif("~/breakout.gif", fps=30) self.max_reward = tot_reward print("ACTIONS TAKEN", actions) return tot_reward