class_names = list(generator_train.class_indices.keys()) with open(TRAINING_TIME_PATH + '/class_names.txt', 'w') as filehandle: for listitem in class_names: filehandle.write('%s\n' % listitem) finetune_model = finetune_inceptionv3( base_model, transfer_layer, TRAIN_LAYERS, dropout=DROPOUT, fc_layers=FC_LAYERS, num_classes=generator_train.num_classes, new_weights=NEW_WEIGHTS) # load weights from last best training by new weights #compile optimizer = Adam(lr=HYPERPARAMS['LEARN_RATE']) finetune_model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy']) class_weight = compute_class_weight(class_weight='balanced', classes=np.unique( generator_train.classes), y=generator_train.classes) # CHECKPOINTS NEW_MODEL_PATH_STRUCTURE = TRAINING_TIME_PATH + '/weights.best.hdf5' checkpoint = ModelCheckpoint(NEW_MODEL_PATH_STRUCTURE, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
scores_log = folder + "/{}".format('scores') if not os.path.exists(folder): os.makedirs(folder) ''' repeated trials ---------------------------------------------------------------- ''' final_scores = [] trials = 10 # model definition, training, logging for i in range(trials): optimizer = Adam(lr=0.001) model = create_model() model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) ''' creating custom dirs ''' model_name = str(layer_num) + '_model' # path for this fold's specific stats trial_path = folder + "/{}".format('fold' + str(i)) # log performance/history perf_log = trial_path + "/{}".format(filename + '_perf_log' + str(i))
input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)) # include_top=False 自己補上output layer x = net.output x = Flatten()(x) x = Dropout(0.5)(x) x = Dense(NUM_CLASSES, activation='softmax', name='predictions')(x) # 因為微調,所以我們得先凍結幾層 net_final = Model(inputs=net.input, outputs=x) for layer in net_final.layers[:FREEZE_LAYERS]: layer.trainable = False for layer in net_final.layers[FREEZE_LAYERS:]: layer.trainable = True net_final.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy']) # print(net_final.summary()) net_final.fit_generator(train_batches, steps_per_epoch=train_batches.samples // BATCH_SIZE, validation_data=valid_batches, validation_steps=valid_batches.samples // BATCH_SIZE, epochs=NUM_EPOCHS, callbacks=[tensorboard]) # 儲存參數 HDF5 file net_final.save('HW02_05_ResNet50.h5')
output_dim=embedding_size, input_length=max_tokens, name="layer_embedding")) # Recurrent units model.add(GRU(units=16, return_sequences=True) ) # LSTMs might be worse as they have redundent gates model.add(GRU(units=8, return_sequences=True)) model.add(GRU(units=4)) # this one outputs only the final output # fully connected layer model.add(Dense(1, activation='sigmoid')) #------------------------------------------------------------------------------ optimizer = Adam(lr=1e-3) model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"]) model.summary() # training of the model start = timeit.default_timer() model.fit(x_train_pad, y_train, validation_split=0.05, epochs=3, batch_size=64) stop = timeit.default_timer() stop - start # Performance evaluation #------------------------------------------------------------------------------
EPOCHS = 20 INIT_LR = 1e-4 BS = 32 losses = { "digit1": "categorical_crossentropy", "digit2": "categorical_crossentropy", "digit3": "categorical_crossentropy", "digit4": "categorical_crossentropy", "digit5": "categorical_crossentropy", "length": "categorical_crossentropy", } #model = build_vgg_custom() model = build_vgg_random() opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS) model.compile(optimizer=opt, loss=losses, metrics=["accuracy"]) print(model.summary()) callback = [ EarlyStopping(monitor='val_loss', patience=EPOCHS), ModelCheckpoint(filepath='best_vgg_random_model.h5', monitor='val_loss', save_best_only=True) ] history = model.fit(train_images, { "digit1": train_labels[:, 0, :], "digit2": train_labels[:, 1, :], "digit3": train_labels[:, 2, :], "digit4": train_labels[:, 3, :],
# 2. we add a DropOut layer followed by a Dense (fully connected) # layer which generates softmax class score for each class # 3. we compile the final model using an Adam optimizer, with a # low learning rate (since we are 'fine-tuning') net = ResNet50(include_top=False, weights='imagenet', input_tensor=None, input_shape=(IMAGE_SIZE[0],IMAGE_SIZE[1],3)) x = net.output x = Flatten()(x) x = Dropout(0.5)(x) output_layer = Dense(NUM_CLASSES, activation='softmax', name='softmax')(x) net_final = Model(inputs=net.input, outputs=output_layer) for layer in net_final.layers[:FREEZE_LAYERS]: layer.trainable = False for layer in net_final.layers[FREEZE_LAYERS:]: layer.trainable = True net_final.compile(optimizer=Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy']) print(net_final.summary()) # train the model history = net_final.fit_generator(train_batches, steps_per_epoch = train_batches.samples // BATCH_SIZE, validation_data = valid_batches, validation_steps = valid_batches.samples // BATCH_SIZE, epochs = NUM_EPOCHS) # save trained weights net_final.save(WEIGHTS_FINAL) # 繪出訓練過程準確度變化 plt.plot(history.history['acc'])
print('Evaluate time: %.3f s' % (time.time() - start_time)) sys.stdout.flush() # Create Model fc = Dense(NUM_CLASSES, activation='sigmoid', name='fc')(model1.layers[-2].output) model3 = Model(inputs=model1.input, outputs=fc) layerFlag = False for layer in model1.layers: if layer.name == 'res5a_branch2a': layerFlag = True layer.trainable = layerFlag #model3.summary(line_length=100) model3.compile(optimizer=Adam(lr=1e-5), loss='binary_crossentropy', metrics=['accuracy']) accuracyHistory = [None] * EPOCHS # Train for i in range(EPOCHS): epoch_time = time.time() accuracyEpoch = 0.0 lossEpoch = 0.0 Y_data_keep = (Y_data > -1) Y_data = Y_data[Y_data_keep] Y_data = one_hot.fit_transform(Y_data) print 'Start train epoch : ' + str(i) start_time = time.time() sys.stdout.flush()
def adr_ao(frames, actions, states, context_frames, Ec, A, D, learning_rate=0.01, gaussian=False, kl_weight=None, L=None, use_seq_len=12, lstm_units=None, lstm_layers=None, training=True, reconstruct_random_frame=False, random_window=True): bs, seq_len, w, h, c = [int(s) for s in frames.shape] assert seq_len >= use_seq_len frame_inputs, action_state, initial_state, _, ins = get_ins( frames, actions, states, use_seq_len=use_seq_len, random_window=random_window, gaussian=gaussian, a_units=lstm_units, a_layers=lstm_layers) rand_index_1 = tf.random.uniform(shape=(), minval=0, maxval=use_seq_len - context_frames + 1, dtype='int32') # Random xc_0, as an artificial way of augmenting the dataset xc_0 = tf.slice(frame_inputs, (0, rand_index_1, 0, 0, 0), (-1, context_frames, -1, -1, -1)) xc_1 = tf.slice(frame_inputs, (0, 0, 0, 0, 0), (-1, context_frames, -1, -1, -1)) x_to_recover = frame_inputs n_frames = use_seq_len # ===== Build the model hc_0, skips_0 = Ec(xc_0) hc_1, _ = Ec(xc_1) hc_0 = tf.slice(hc_0, (0, context_frames - 1, 0), (-1, 1, -1)) hc_1 = tf.slice(hc_1, (0, context_frames - 1, 0), (-1, 1, -1)) skips = slice_skips(skips_0, start=context_frames - 1, length=1) if reconstruct_random_frame: action_state_len = action_state.shape[-1] rand_index_2 = tf.random.uniform(shape=(), minval=0, maxval=use_seq_len, dtype='int32') action_state = tf.slice(action_state, (0, 0, 0), (bs, rand_index_2 + 1, action_state_len)) x_to_recover = tf.slice(frame_inputs, (0, rand_index_2, 0, 0, 0), (bs, 1, w, h, c)) n_frames = rand_index_2 + 1 else: skips = repeat_skips(skips, use_seq_len) ha = A(action_state) hc_repeat = RepeatVector(n_frames)(tf.squeeze(hc_0, axis=1)) hc_ha = K.concatenate([hc_repeat, ha], axis=-1) if gaussian: z, mu, logvar, state = L([hc_ha, initial_state]) z = mu if training is False else z hc_ha = K.concatenate([hc_repeat, ha, z], axis=-1) if reconstruct_random_frame: _, hc_ha = tf.split(hc_ha, [-1, 1], axis=1) if gaussian: _, mu = tf.split(mu, [-1, 1], axis=1) _, logvar = tf.split(logvar, [-1, 1], axis=1) x_recovered = D([hc_ha, skips]) rec_loss = mean_squared_error(x_to_recover, x_recovered) sim_loss = mean_squared_error(hc_0, hc_1) if gaussian: ED = Model(inputs=ins, outputs=[x_recovered, x_to_recover, mu, logvar]) else: ED = Model(inputs=ins, outputs=[x_recovered, x_to_recover]) ED.add_metric(rec_loss, name='rec_loss', aggregation='mean') ED.add_metric(sim_loss, name='sim_loss', aggregation='mean') if gaussian: kl_loss = kl_unit_normal(mu, logvar) ED.add_metric(kl_loss, name='kl_loss', aggregation='mean') ED.add_loss( K.mean(rec_loss) + K.mean(sim_loss) + kl_weight * K.mean(kl_loss)) else: ED.add_loss(K.mean(rec_loss) + K.mean(sim_loss)) ED.compile(optimizer=Adam(lr=learning_rate)) return ED
import numpy as np from tensorflow.python.keras import Sequential from tensorflow.python.keras.layers import Dense from tensorflow.python.keras.optimizers import Adam x = np.array(np.random.random((1000, 1)) * 100 - 30) # x = np.array([x for x in range(30)]) y = np.array([i * i - 20 * i + 4 for i in x]) model = Sequential() model.add(Dense(10, input_dim=1, activation='relu')) model.add(Dense(20, activation='relu')) model.add(Dense(1)) learning_rate = 0.01 model.compile(loss='mse', optimizer=Adam(learning_rate)) model.fit(x=x, y=y, epochs=1050, verbose=0) # # predictions=model.predict(x) model.evaluate(x=x, y=y) # print(x)
def create_model(learning_rate, num_dense_layers, num_dense_nodes, activation): """ Hyper-parameters: learning_rate: Learning-rate for the optimizer. num_dense_layers: Number of dense layers. num_dense_nodes: Number of nodes in each dense layer. activation: Activation function for all layers. """ # Start construction of a Keras Sequential model. model = Sequential() # Add an input layer which is similar to a feed_dict in TensorFlow. # Note that the input-shape must be a tuple containing the image-size. model.add(InputLayer(input_shape=(img_size_flat, ))) # The input from MNIST is a flattened array with 784 elements, # but the convolutional layers expect images with shape (28, 28, 1) model.add(Reshape(img_shape_full)) # First convolutional layer. # There are many hyper-parameters in this layer, but we only # want to optimize the activation-function in this example. model.add( Conv2D(kernel_size=5, strides=1, filters=16, padding='same', activation=activation, name='layer_conv1')) model.add(MaxPooling2D(pool_size=2, strides=2)) # Second convolutional layer. # Again, we only want to optimize the activation-function here. model.add( Conv2D(kernel_size=5, strides=1, filters=36, padding='same', activation=activation, name='layer_conv2')) model.add(MaxPooling2D(pool_size=2, strides=2)) # Flatten the 4-rank output of the convolutional layers # to 2-rank that can be input to a fully-connected / dense layer. model.add(Flatten()) # Add fully-connected / dense layers. # The number of layers is a hyper-parameter we want to optimize. for i in range(num_dense_layers): # Name of the layer. This is not really necessary # because Keras should give them unique names. name = 'layer_dense_{0}'.format(i + 1) # Add the dense / fully-connected layer to the model. # This has two hyper-parameters we want to optimize: # The number of nodes and the activation function. model.add(Dense(num_dense_nodes, activation=activation, name=name)) # Last fully-connected / dense layer with softmax-activation # for use in classification. model.add(Dense(num_classes, activation='softmax')) # Use the Adam method for training the network. # We want to find the best learning-rate for the Adam method. optimizer = Adam(lr=learning_rate) # In Keras we need to compile the model so it can be trained. model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) return model
print("DeepCellState: " + str(our_corr)) print("Improvement: " + str(our_corr / baseline_corr)) # exit() tcorr = 0 tcorrb = 0 for i in range(len(pert_ids)): test_input = input_data[i] test_output = output_data[i] autoencoder_w = keras.models.load_model(model + "main_model/") autoencoder_w.get_layer("decoder").set_weights( pickle.load(open(model + "MCF7" + "_decoder_weights", "rb"))) input_tr = np.delete(np.asarray(input_data), i, axis=0) output_tr = np.delete(np.asarray(output_data), i, axis=0) autoencoder = deepfake.build(978, 128) autoencoder.set_weights(autoencoder_w.get_weights()) autoencoder.compile(loss="mse", optimizer=Adam(lr=1e-5)) autoencoder.fit(input_tr, output_tr, epochs=50, batch_size=1) decoded = autoencoder.predict(np.asarray([test_input])) corr = stats.pearsonr(decoded.flatten(), test_output.flatten())[0] cdata.append(corr) tcorr = tcorr + corr print(corr) # Needed to prevent Keras memory leak del autoencoder gc.collect() K.clear_session() tf.compat.v1.reset_default_graph() tcorr = tcorr / len(pert_ids) print("DeepCellState*: " + str(tcorr))
def build_model2(self): vae_input = Input(shape=self.input_dim) #print("vae_input shape " + str(vae_input.shape)) vae_c1 = Conv2D(filters=32, kernel_size=3, padding='same', activation='relu', trainable=True)(vae_input) vae_m1 = MaxPooling2D((2, 2), padding='same', trainable=True)(vae_c1) vae_c2 = Conv2D(filters=16, kernel_size=3, padding='same', activation='relu', trainable=True)(vae_m1) vae_m2 = MaxPooling2D((2, 2), padding='same', trainable=True)(vae_c2) vae_c3 = Conv2D(filters=16, kernel_size=3, padding='same', activation='relu', trainable=True)(vae_m2) vae_m3 = MaxPooling2D((2, 2), padding='same', trainable=True)(vae_c3) vae_c4 = Conv2D(filters=16, kernel_size=3, padding='same', activation='relu', trainable=True)(vae_m3) vae_m4 = MaxPooling2D((2, 2), padding='same', trainable=True)(vae_c4) vae_c5 = Conv2D(filters=16, kernel_size=3, padding='same', activation='relu', trainable=True)(vae_m4) vae_m5 = MaxPooling2D((2, 2), padding='same', trainable=True)(vae_c5) vae_c6 = Conv2D(filters=4, kernel_size=3, padding='same', activation='relu', trainable=True)(vae_m5) vae_m6 = MaxPooling2D((2, 2), padding='same', trainable=True)(vae_c6) vae_z_in = Flatten()(vae_m6) print("vae_z_in shape " + str(vae_z_in.shape)) vae_z = Dense(25, trainable=True)(vae_z_in) print("vae_z shape " + str(vae_z.shape)) vae_z_input = Input(shape=(25, )) print("vae_z_input shape " + str(vae_z_input.shape)) vae_z_out = Reshape((5, 5, 1)) vae_z_out_model = vae_z_out(vae_z) print("vae_z_out_model shape " + str(vae_z_out_model.shape)) #vae_d1 = Conv2D( filters=8, kernel_size=(3, 3), padding='same', activation='relu') vae_u1 = UpSampling2D((3, 4)) vae_d2 = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu') vae_u2 = UpSampling2D((2, 2)) vae_d3 = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu') vae_u3 = UpSampling2D((2, 2)) vae_d4 = Conv2D(filters=16, kernel_size=(3, 3), padding='same', activation='relu') vae_u4 = UpSampling2D((2, 2)) vae_d5 = Conv2D(filters=8, kernel_size=(3, 3), padding='same', activation='relu') vae_u5 = UpSampling2D((2, 2)) vae_d6 = Conv2D(filters=1, kernel_size=(3, 3), padding='same', activation='sigmoid') # vae_d1_model = vae_d1(vae_z_out_model) vae_u1_model = vae_u1(vae_z_out_model) vae_d2_model = vae_d2(vae_u1_model) vae_u2_model = vae_u2(vae_d2_model) vae_d3_model = vae_d3(vae_u2_model) vae_u3_model = vae_u3(vae_d3_model) vae_d4_model = vae_d4(vae_u3_model) vae_u4_model = vae_u4(vae_d4_model) vae_d5_model = vae_d5(vae_u4_model) vae_u5_model = vae_u5(vae_d5_model) vae_d6_model = vae_d6(vae_u5_model) #print("vae_d1_model shape " + str(vae_d1_model.shape)) #print("vae_u1_model shape " + str(vae_u1_model.shape)) #print("vae_d2_model shape " + str(vae_d2_model.shape)) #print("vae_u2_model shape " + str(vae_u2_model.shape)) #print("vae_d3_model shape " + str(vae_d3_model.shape)) #print("vae_u3_model shape " + str(vae_u3_model.shape)) #print("vae_d4_model shape " + str(vae_d4_model.shape)) #print("vae_u4_model shape " + str(vae_u4_model.shape)) #print("vae_d5_model shape " + str(vae_d5_model.shape)) #240 120 60 30 15 #320 160 80 40 20 vae_dense_decoder = vae_z_input vae_z_out_decoder = vae_z_out(vae_dense_decoder) #vae_d1_decoder = vae_d1(vae_z_out_decoder) vae_u1_decoder = vae_u1(vae_z_out_decoder) vae_d2_decoder = vae_d2(vae_u1_decoder) vae_u2_decoder = vae_u2(vae_d2_decoder) vae_d3_decoder = vae_d3(vae_u2_decoder) vae_u3_decoder = vae_u3(vae_d3_decoder) vae_d4_decoder = vae_d4(vae_u3_decoder) vae_u4_decoder = vae_u4(vae_d4_decoder) vae_d5_decoder = vae_d5(vae_u4_decoder) vae_u5_decoder = vae_u5(vae_d5_decoder) vae_d6_decoder = vae_d6(vae_u5_decoder) print("vae_d1_decoder shape " + str(vae_u1_decoder.shape)) print("vae_d2_decoder shape " + str(vae_d2_decoder.shape)) print("vae_d3_decoder shape " + str(vae_d3_decoder.shape)) print("vae_d4_decoder shape " + str(vae_d4_decoder.shape)) print("vae_d5_decoder shape " + str(vae_d5_decoder.shape)) # Models vae = Model(vae_input, vae_d6_model) vae_encoder = Model(vae_input, vae_z) vae_decoder = Model(vae_z_input, vae_d6_decoder) #vae.compile(optimizer='rmsprop', loss = vae_loss, metrics = [vae_r_loss, vae_kl_loss]) #vae.compile(optimizer='rmsprop', loss='binary_crossentropy') #optimizer = Adam(lr=0.001) vae.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy') vae.summary() return (vae, vae_encoder, vae_decoder)
# %% # full_dataset = tf.data.Dataset.from_tensor_slices((x_data, y_data)) # full_dataset = full_dataset.cache().shuffle(buffer_size=BUFFER_SIZE) # train_size = int(0.7 * DATASET_SIZE) # test_size = int(0.3 * DATASET_SIZE) # train_dataset = full_dataset.take(train_size).batch(BATCH_SIZE).repeat() # test_dataset = full_dataset.skip(train_size).batch(BATCH_SIZE).repeat() model = Sequential() # model.add(LSTM(30, input_shape=(TIMESERIES_LENGTH, 3))) 单向,在此改双向 model.add(LSTM(30, input_shape=(TIMESERIES_LENGTH, 3))) model.add(Dropout(0.2)) model.add(Dense(6, activation='softmax')) model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy']) model.summary() checkpoint = ModelCheckpoint('classification1_10.h5', monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] history = model.fit(x_data, y_data, validation_split=0.25, # epochs=50, batch_size=16, epochs=10, batch_size=16, verbose=1, callbacks=callbacks_list) with open('./classificationTrainHistoryDict1_10', 'wb') as file_pi: pickle.dump(history.history, file_pi)
plt.savefig("Loss.png") plt.show() if __name__ == '__main__': # Cargar datos x_train, y_train = load_minst_data() # Definición de variables input_rows = 32 # Tamaño de imagen - filas input_cols = 32 # Tamaño de imagen - columnas input_channels = 3 # Canales de la imagen latent_dim = 110 # Espacio latente de acuerdo a cantidad de imagenes a generar durante entrenamiento epochs = 150000 # Epocas de entrenamiento batch_size = 128 # Cantidad de imagenes a tomar del X_train para cada ciclo de entrenamiento sample_interval = 1000 # Intervalo de épocas para guardar modelos e imagenes input_classes = pd.Series(y_train).nunique() # Calcula la cantidad de clases en y_train img_shape = (input_rows, input_cols, input_channels) # Dimensiones de la imagen optimizer = Adam(0.0002, 0.5) # Optimizador losses = ['binary_crossentropy', 'sparse_categorical_crossentropy'] # Pérdidas print("x_train shape: {}".format(x_train.shape)) print("y_train.shape:{}".format(y_train.shape)) # Entrenamiento g_loss, d_loss = train(input_classes, img_shape, input_channels, x_train, y_train, epochs, optimizer, losses, batch_size, sample_interval, latent_dim) # Evaluación de pérdidas plt.style.use('seaborn-white') plot_gan_losses(g_loss, d_loss)
trainX, testX, trainY, testY = train_test_split(processed_data, ohe, test_size=0.92, random_state=42) # initialize the model using a sigmoid activation as the final layer # in the network so we can perform multi-label classification print("[INFO] compiling model...") model = SmallVGGNet.build(width=IMAGE_DIMS[1], height=IMAGE_DIMS[0], depth=IMAGE_DIMS[2], classes=len(mlb.classes_), finalAct="sigmoid") # initialize the optimizer opt = Adam(lr=config.learn_rate) # compile the model using binary cross-entropy rather than # categorical cross-entropy -- this may seem counterintuitive for # multi-label classification, but keep in mind that the goal here # is to treat each output label as an independent Bernoulli # distribution model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"]) print("[INFO] summary of model...") print(model.summary()) #callbacks callbacks = [ WandbCallback(), EarlyStopping(patience=100, monitor='val_loss', verbose=1),
import os from tensorflow.python import keras from tensorflow.python.keras.optimizers import Adam from seisnn.io import get_dir_list from seisnn.tensorflow.generator import DataGenerator from seisnn.tensorflow.model import Nest_Net, U_Net pkl_dir = "/mnt/tf_data/dataset/201718select_random" pkl_list = get_dir_list(pkl_dir) training_generator = DataGenerator(pkl_list[:5000], batch_size=2, shuffle=False) validation_generator = DataGenerator(pkl_list[-7304:], batch_size=32) tensorboard = keras.callbacks.TensorBoard(log_dir='../logs', histogram_freq=0, write_graph=True, write_images=False) model = Nest_Net(1, 3001, 1) # model = U_Net(1, 3001, 1) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy']) model.fit_generator(generator=training_generator, validation_data=validation_generator, epochs=1, use_multiprocessing=True, callbacks=[tensorboard]) weight_dir = "/mnt/tf_data/weights" os.makedirs(weight_dir, exist_ok=True) model.save_weights('/mnt/tf_data/weights/pretrained_weight.h5')
use_fm=True, dnn_hidden_units=(128, 256), dnn_dropout=0) # model = DeepFMmmoe(linear_feature_columns, dnn_feature_columns, embedding_size=8, use_fm=True, dnn_hidden_units=(128, 128,), # l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, # dnn_activation='relu', dnn_use_bn=False, task='binary') # try: # model = multi_gpu_model(model, gpus=2) # print("Training using multiple GPUs..") # except Exception as e: # print(e) # print("Training using single GPU or CPU..") model.compile(Adam(lr=0.0001), "binary_crossentropy", metrics=['binary_crossentropy', tf.keras.metrics.AUC()], loss_weights=[0.6, 0.4]) model.summary() print(model.metrics_names) checkpoint_path = os.path.join(CHECKPOINT_ROOT_DIR, "deepfmMMoetrain", "deepfmMMoetrain-{epoch:04d}.ckpt") checkpoint_dir = os.path.dirname(checkpoint_path) callbacks = [ tf.keras.callbacks.ModelCheckpoint(checkpoint_path,
def adr(frames, actions, states, context_frames, Ec, Eo, A, Do, Da, La=None, gaussian_a=False, use_seq_len=12, lstm_units=256, lstm_layers=1, learning_rate=0.001, random_window=True, reconstruct_random_frame=True): bs, seq_len, w, h, c = [int(s) for s in frames.shape] assert seq_len > use_seq_len frame_inputs, action_state, initial_state, _, ins = get_ins( frames, actions, states, use_seq_len=use_seq_len, random_window=random_window, gaussian=gaussian_a, a_units=lstm_units, a_layers=lstm_layers) # context frames at the beginning xc_0 = tf.slice(frame_inputs, (0, 0, 0, 0, 0), (-1, context_frames, -1, -1, -1)) x_to_recover = frame_inputs n_frames = use_seq_len # ===== Build the model hc_0, skips_0 = Ec(xc_0) hc_0 = tf.slice(hc_0, (0, context_frames - 1, 0), (-1, 1, -1)) skips = slice_skips(skips_0, start=context_frames - 1, length=1) if reconstruct_random_frame: a_s_dim = action_state.shape[-1] rand_index_1 = tf.random.uniform((), minval=0, maxval=use_seq_len, dtype='int32') action_state = tf.slice(action_state, (0, 0, 0), (bs, rand_index_1 + 1, a_s_dim)) x_to_recover = tf.slice(frames, (0, rand_index_1, 0, 0, 0), (bs, 1, w, h, c)) n_frames = rand_index_1 + 1 else: skips = repeat_skips(skips, use_seq_len) ha = A(action_state) hc_repeat = RepeatVector(n_frames)(tf.squeeze(hc_0, axis=1)) hc_ha = K.concatenate([hc_repeat, ha], axis=-1) if gaussian_a: _, za, _, _ = La([hc_ha, initial_state]) hc_ha = K.concatenate([hc_repeat, ha, za], axis=-1) if reconstruct_random_frame: _, hc_ha = tf.split(hc_ha, [-1, 1], axis=1) _, ha = tf.split(ha, [-1, 1], axis=1) hc_repeat = hc_0 x_rec_a = Da([hc_ha, skips]) # --> Changed the input to Eo from the error image to the full frame and the action only prediction x_rec_a_pos = K.relu(x_to_recover - x_rec_a) x_rec_a_neg = K.relu(x_rec_a - x_to_recover) # xo_rec_a = K.concatenate([x_rec_a_pos, x_rec_a_neg], axis=-1) xo_rec_a = K.concatenate([x_to_recover, x_rec_a], axis=-1) ho, _ = Eo(xo_rec_a) # ho = Eo(xo_rec_a) h = K.concatenate([hc_repeat, ha, ho], axis=-1) # multiple reconstruction x_err = Do([h, skips]) x_err_pos = x_err[:, :, :, :, :3] x_err_neg = x_err[:, :, :, :, 3:] x_recovered = x_err_pos - x_err_neg x_target = x_to_recover - x_rec_a x_target_pos = x_rec_a_pos x_target_neg = x_rec_a_neg # == Autoencoder model = Model(inputs=ins, outputs=x_recovered) rec_loss = mean_squared_error(x_target, x_recovered) model.add_metric(K.mean(rec_loss), name='rec_loss', aggregation='mean') rec_loss_pos = mean_squared_error(x_target_pos, x_err_pos) model.add_metric(rec_loss_pos, name='rec_loss_pos', aggregation='mean') rec_loss_neg = mean_squared_error(x_target_neg, x_err_neg) model.add_metric(rec_loss_neg, name='rec_loss_neg', aggregation='mean') rec_action_only_loss = mean_squared_error(x_rec_a, x_to_recover) model.add_metric(rec_action_only_loss, name='rec_A', aggregation='mean') model.add_loss( K.mean(rec_loss) + (K.mean(rec_loss_pos) + K.mean(rec_loss_neg))) model.compile(optimizer=Adam(lr=learning_rate)) return model
def _main(): annotation_path = 'train.txt' log_dir = 'logs/000/' classes_path = 'model_data/coco_classes.txt' anchors_path = 'model_data/yolo_anchors.txt' class_names = _get_classes(classes_path) num_classes = len(class_names) anchors = _get_anchors(anchors_path) input_shape = (416, 416) model, bottleneck_model, last_layer_model = _create(input_shape, anchors, num_classes, freeze_body=2, weights_path='model_path/yolo.h5') logging = TensorBoard(log_dir=log_dir) checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', save_weights_only=True, save_best_only=True, period=3) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) val_split = .1 with open(annotation_path) as f: lines = f.readlines() np.random.seed(1) np.random.shuffle(lines) np.random.seed(None) num_val = int(len(lines) * val_split) num_train = len(lines) - num_val #Train with frozen layers first (to stabilize the loss) if True: #perform bottleneck training if not os.path.isfile('bottlenecks.npz'): print('Calculating Bottlenecks: ') batch_size = 8 bottlenecks = bottleneck_model.predict_generator(_data_generator_wrapper(lines, batch_size, input_shape, anchors, num_classes, random=False, verbose=True), steps=(len(lines) // batch_size) + 1, max_queue_size=1) np.savez('bottlenecks.npz', bot0=bottlenecks[0], bot1=bottlenecks[1], bot2=bottlenecks[2]) #Load bottleneck features from file dict_bot = np.load('bottlenecks.npz') bottlenecks_train = [dict_bot['bot0'][:num_train], dict_bot['bot1'][:num_train], dict_bot['bot2'][:num_train]] bottlenecks_val = [dict_bot['bot0'][num_train:], dict_bot['bot1'][num_train:], dict_bot['bot2'][num_train:]] #Train last layers with fixed bottleneck features batch_size = 8 print('Training last layers with bottleneck features with {} samples, val on {} samples and batch size {}'.format( num_train, num_val, batch_size )) last_layer_model.compile(optimizer='adam', loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) last_layer_model.fit_generator(_bottleneck_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, bottlenecks_train), steps_per_epoch=max(1, num_train // batch_size), validation_data=_bottleneck_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, bottlenecks_val), validation_steps=max(1, num_val // batch_size), epochs=30, initial_epoch=0, max_queue_size=1) model.save_weights(log_dir + 'trained_weights_stage_0.h5') #train last layers with random augmented data model.compile(optimizer=Adam(lr=1e-3), loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) batch_size = 16 print('Train on {} samples, val on {} samples, with batch size {}.'.format( num_train, num_val, batch_size )) model.fit_generator(_data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), steps_per_epoch=max(1, num_train // batch_size), validation_data=_data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // batch_size), epochs=50, initial_epoch=0, callbacks=[logging, checkpoint]) model.save_weights(log_dir + 'trained_weights_stage_1.h5') #Unfreeze and continue training, to fine tune if True: for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=Adam(lr=1e-4), loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) print('Unfreeze all of the layers.') batch_size = 4 print('Train on {} samples, val on {} samples, with batch size {}.'.format( num_train, num_val, batch_size )) model.fit_generator(_data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), steps_per_epoch=max(1, num_train//batch_size), validation_data=_data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val//batch_size), epochs=100, initial_epoch=50, callbacks=[logging, checkpoint, reduce_lr, early_stopping]) model.save_weights(log_dir + 'trained_weights_final.h5')