def run_cifar10(batch_size, nb_epoch, depth, nb_dense_block, nb_filter, growth_rate, dropout_rate, learning_rate, weight_decay, plot_architecture): """ Run CIFAR10 experiments :param batch_size: int -- batch size :param nb_epoch: int -- number of training epochs :param depth: int -- network depth :param nb_dense_block: int -- number of dense blocks :param nb_filter: int -- initial number of conv filter :param growth_rate: int -- number of new filters added by conv layers :param dropout_rate: float -- dropout rate :param learning_rate: float -- learning rate :param weight_decay: float -- weight decay :param plot_architecture: bool -- whether to plot network architecture """ ################### # Data processing # ################### # the data, shuffled and split between train and test sets (X_train, y_train), (X_test, y_test) = cifar10.load_data() nb_classes = len(np.unique(y_train)) img_dim = X_train.shape[1:] if K.image_data_format() == "channels_first": n_channels = X_train.shape[1] else: n_channels = X_train.shape[-1] # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) X_train = X_train.astype('float32') X_test = X_test.astype('float32') # Normalisation X = np.vstack((X_train, X_test)) # 2 cases depending on the image ordering if K.image_data_format() == "channels_first": for i in range(n_channels): mean = np.mean(X[:, i, :, :]) std = np.std(X[:, i, :, :]) X_train[:, i, :, :] = (X_train[:, i, :, :] - mean) / std X_test[:, i, :, :] = (X_test[:, i, :, :] - mean) / std elif K.image_data_format() == "channels_last": for i in range(n_channels): mean = np.mean(X[:, :, :, i]) std = np.std(X[:, :, :, i]) X_train[:, :, :, i] = (X_train[:, :, :, i] - mean) / std X_test[:, :, :, i] = (X_test[:, :, :, i] - mean) / std ################### # Construct model # ################### model = densenet.DenseNet(nb_classes, img_dim, depth, nb_dense_block, growth_rate, nb_filter, dropout_rate=dropout_rate, weight_decay=weight_decay) # Model output model.summary() # Build optimizer opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=["accuracy"]) if plot_architecture: from keras.utils.visualize_util import plot plot(model, to_file='./figures/densenet_archi.png', show_shapes=True) #################### # Network training # #################### print("Training") list_train_loss = [] list_test_loss = [] list_learning_rate = [] for e in range(nb_epoch): if e == int(0.5 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 10.)) if e == int(0.75 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 100.)) split_size = batch_size num_splits = X_train.shape[0] / split_size arr_splits = np.array_split(np.arange(X_train.shape[0]), num_splits) l_train_loss = [] start = time.time() for batch_idx in arr_splits: X_batch, Y_batch = X_train[batch_idx], Y_train[batch_idx] train_logloss, train_acc = model.train_on_batch(X_batch, Y_batch) l_train_loss.append([train_logloss, train_acc]) test_logloss, test_acc = model.evaluate(X_test, Y_test, verbose=0, batch_size=64) list_train_loss.append(np.mean(np.array(l_train_loss), 0).tolist()) list_test_loss.append([test_logloss, test_acc]) list_learning_rate.append(float(K.get_value(model.optimizer.lr))) # to convert numpy array to json serializable print('Epoch %s/%s, Time: %s' % (e + 1, nb_epoch, time.time() - start)) d_log = {} d_log["batch_size"] = batch_size d_log["nb_epoch"] = nb_epoch d_log["optimizer"] = opt.get_config() d_log["train_loss"] = list_train_loss d_log["test_loss"] = list_test_loss d_log["learning_rate"] = list_learning_rate json_file = os.path.join('./log/experiment_log_cifar10.json') with open(json_file, 'w') as fp: json.dump(d_log, fp, indent=4, sort_keys=True)
def run_cifar10(batch_size, nb_epoch, depth, nb_dense_block, nb_filter, growth_rate, dropout_rate, learning_rate, weight_decay, plot_architecture): """ Run CIFAR10 experiments :param batch_size: int -- batch size :param nb_epoch: int -- number of training epochs :param depth: int -- network depth :param nb_dense_block: int -- number of dense blocks :param nb_filter: int -- initial number of conv filter :param growth_rate: int -- number of new filters added by conv layers :param dropout_rate: float -- dropout rate :param learning_rate: float -- learning rate :param weight_decay: float -- weight decay :param plot_architecture: bool -- whether to plot network architecture """ ################### # Data processing # ################### # the data, shuffled and split between train and test sets (X_train, y_train), (X_test, y_test) = cifar10.load_data() nb_classes = len(np.unique(y_train)) img_dim = X_train.shape[1:] if K.image_dim_ordering() == "th": n_channels = X_train.shape[1] else: n_channels = X_train.shape[-1] # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) X_train = X_train.astype('float32') X_test = X_test.astype('float32') # Normalisation X = np.vstack((X_train, X_test)) # 2 cases depending on the image ordering if K.image_dim_ordering() == "th": for i in range(n_channels): mean = np.mean(X[:, i, :, :]) std = np.std(X[:, i, :, :]) X_train[:, i, :, :] = (X_train[:, i, :, :] - mean) / std X_test[:, i, :, :] = (X_test[:, i, :, :] - mean) / std elif K.image_dim_ordering() == "tf": for i in range(n_channels): mean = np.mean(X[:, :, :, i]) std = np.std(X[:, :, :, i]) X_train[:, :, :, i] = (X_train[:, :, :, i] - mean) / std X_test[:, :, :, i] = (X_test[:, :, :, i] - mean) / std ################### # Construct model # ################### model = densenet.DenseNet(nb_classes, img_dim, depth, nb_dense_block, growth_rate, nb_filter, dropout_rate=dropout_rate, weight_decay=weight_decay) # Model output model.summary() # Build optimizer opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=["accuracy"]) if plot_architecture: from keras.utils.visualize_util import plot plot(model, to_file='./figures/densenet_archi.png', show_shapes=True) #################### # Network training # #################### print("Training") list_train_loss = [] list_test_loss = [] list_learning_rate = [] for e in range(nb_epoch): if e == int(0.5 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 10.)) if e == int(0.75 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 100.)) split_size = batch_size num_splits = X_train.shape[0] / split_size arr_splits = np.array_split(np.arange(X_train.shape[0]), num_splits) l_train_loss = [] start = time.time() for batch_idx in arr_splits: X_batch, Y_batch = X_train[batch_idx], Y_train[batch_idx] train_logloss, train_acc = model.train_on_batch(X_batch, Y_batch) l_train_loss.append([train_logloss, train_acc]) test_logloss, test_acc = model.evaluate(X_test, Y_test, verbose=0, batch_size=64) list_train_loss.append(np.mean(np.array(l_train_loss), 0).tolist()) list_test_loss.append([test_logloss, test_acc]) list_learning_rate.append(float(K.get_value(model.optimizer.lr))) # to convert numpy array to json serializable print('Epoch %s/%s, Time: %s' % (e + 1, nb_epoch, time.time() - start)) d_log = {} d_log["batch_size"] = batch_size d_log["nb_epoch"] = nb_epoch d_log["optimizer"] = opt.get_config() d_log["train_loss"] = list_train_loss d_log["test_loss"] = list_test_loss d_log["learning_rate"] = list_learning_rate json_file = os.path.join('./log/experiment_log_cifar10.json') with open(json_file, 'w') as fp: json.dump(d_log, fp, indent=4, sort_keys=True)
class GAN(object): """ Generative Adversarial Network class """ def __init__(self, latent_dim=100, length=16, width=120, height=160, channels=3, c3d_weights=None): self.image_path = '/media/lq/C13E-1ED0/dataset/UCF_Crimes/Imgs/RoadAccidents/' self.n_classes = 2 self.latent_dim = latent_dim self.length = length self.width = width self.height = height self.channels = channels self.c3d_weights = c3d_weights self.shape = (self.width, self.height, self.channels) self.disc_optimizer = Adam(lr=1e-6, decay=0.00005) self.gen_optimizer = Adam(lr=0.0006, decay=0.00005) #init the c3d model self.c3d_model = c3d_model.get_model() if self.c3d_weights == None: raise Exception('weights is requited!') try: self.c3d_model.load_weights(self.c3d_weights) except OSError: print( "the pretrained weights doesn't exist, please use <-h> to check usage" ) exit() convLayers = [ 'conv1', 'conv2', 'conv3a', 'conv3b', 'conv4a', 'conv4b', 'conv5a', 'conv5b' ] for layer in convLayers: self.c3d_model.get_layer(layer).trainable = False self.add_outputs(1) #fixed c3d (conv1 - pool5), extracting real features self.fixed_c3d = Model( inputs=self.c3d_model.input, outputs=self.c3d_model.get_layer('flatten_1').output) #discriminator, self.D = self.__discriminator() self.D.compile(loss='categorical_crossentropy', optimizer=self.disc_optimizer, metrics=['accuracy']) #generator self.G = self.__generator() # self.G.compile(loss='', optimizer=self.optimizer) self.GAN = self.__stacked_generator_discriminator() self.GAN.compile(loss=self.loss_matching, optimizer=self.gen_optimizer) self.c3d_model.summary() self.fixed_c3d.summary() self.G.summary() self.D.summary() self.GAN.summary() def loss_matching(self, y_true, y_pred): # loss = K.mean(K.abs(y_pred)) loss = tf.nn.l2_loss(y_pred) return loss def __generator(self): """ Declare generator """ #FC1 generator_input = Input(shape=(self.latent_dim, )) x = Dense(8192, name='fc1')(generator_input) x = BatchNormalization()(x) x = Activation('relu')(x) #FC2 x = Dense(10240, name='fc2')(x) x = BatchNormalization()(x) x = Activation('relu')(x) model_gen = Model(generator_input, x) return model_gen def __discriminator(self): """ Declare discriminator """ FC6 = self.c3d_model.get_layer('fc6') FC7 = self.c3d_model.get_layer('fc7') FC8 = self.c3d_model.get_layer('fc8') discriminator_input = Input(shape=(10240, )) x = FC6(discriminator_input) x = LeakyReLU()(x) x = FC7(x) x = LeakyReLU()(x) x = Dropout(0.4)(x) x = FC8(x) model_disc = Model(discriminator_input, x) return model_disc def __stacked_generator_discriminator(self): #output from generator gan_input_fake = Input(shape=(self.latent_dim, )) fake_feature = self.G(gan_input_fake) #output from ConvNets (pool5) real_feature = Input((10240, )) #FC6 and FC7 layers intermediate_layer_model = Model( inputs=self.D.input, outputs=self.D.get_layer('fc7').get_output_at(1)) # set the discriminator weights to non-trainable intermediate_layer_model.trainable = False # self.G.summary() # intermediate_layer_model.summary() # exit(0) out_fake = intermediate_layer_model(fake_feature) out_real = intermediate_layer_model(real_feature) matching = Lambda( lambda x: K.mean(x[0], axis=0) - K.mean(x[1], axis=0), name='loss_matching')([out_fake, out_real]) model_gan = Model(inputs=[gan_input_fake, real_feature], outputs=[matching]) return model_gan def train(self, iterations=5000, batch_size=16, save_it=[2500], id=0): #load the real data train_AS_windows, train_A_windows, train_BG_windows = load_train_data( ) # load train data #real image generator for discriminator (AS + non AS) disc_batch_generator = batch_generator_AS_A_BG_2_1_1( AS_windows=train_AS_windows, A_windows=train_A_windows, BG_windows=train_BG_windows, windows_length=self.length, batch_size=batch_size, N_iterations=iterations, N_classes=self.n_classes + 1, img_path=self.image_path) #real image generator for generator (AS only) gen_batch_generator = batch_generator_AS(AS_windows=train_AS_windows, windows_length=self.length, batch_size=batch_size, N_iterations=iterations, N_classes=self.n_classes + 1, img_path=self.image_path) #logs result_dir = '/media/lq/C13E-1ED0/dataset/UCF_Crimes/results/gan_{}/'.format( id) weight_dir = os.path.join(result_dir, 'weights') if not os.path.isdir(weight_dir): os.makedirs(weight_dir) if not os.path.exists(weight_dir): os.makedirs(result_dir) log_dir = os.path.join(result_dir, 'logs') desp = os.path.join(result_dir, 'desp.txt') with open(desp, 'w') as f: f.write("c3d weights: {}\n".format(self.c3d_weights)) f.write("disc_optimizer: {}\n".format( self.disc_optimizer.get_config())) f.write("gen_optimizer: {}\n".format( self.gen_optimizer.get_config())) callback = callbacks.TensorBoard(log_dir=log_dir, batch_size=batch_size, histogram_freq=0, write_graph=True, write_images=True) callback.set_model(self.GAN) # loss_names = ['disc_train_loss_real', 'disc_train_acc_real', 'disc_train_loss_fake', 'disc_train_acc_fake', 'gen_train_loss'] loss_names = ['disc_train_loss', 'disc_train_acc', 'gen_train_loss'] for cnt in tqdm(range(iterations)): '''discriminator''' #Sample random points in the latent space random_latent_vectors = np.random.standard_normal( size=(batch_size // 2, self.latent_dim)) #Decode them to fake images generated_features = self.G.predict(random_latent_vectors) #real images real_images, real_labels = next(disc_batch_generator) real_features = self.fixed_c3d.predict(real_images) fake_labels = np.ones(batch_size // 2) * ( self.n_classes) #n_classes=21, 0=>BG, 1-20=>actions, 21=>fake fake_labels = np_utils.to_categorical(fake_labels, self.n_classes + 1) combined_features = np.concatenate( [generated_features, real_features]) labels = np.concatenate([fake_labels, real_labels]) # Add random noise to the labels - important trick! # labels += 0.05 * np.random.random(labels.shape) d_loss, d_acc = self.D.train_on_batch(combined_features, labels) # d_loss_real, d_acc_real = self.D.train_on_batch(real_features, real_labels) # d_loss_fake, d_acc_fake = self.D.train_on_batch(generated_features, fake_labels) '''generator (via the gan model, where the discriminator weights are frozen)''' random_latent_vectors = np.random.standard_normal( size=(batch_size, self.latent_dim)) real_AS_images, real_AS_labels = next(gen_batch_generator) real_AS_features = self.fixed_c3d.predict(real_AS_images) g_loss = self.GAN.train_on_batch( [random_latent_vectors, real_AS_features], [real_AS_labels]) #the labels are not used #tensorboard log # logs = [d_loss_real, d_acc_real, d_loss_fake, d_acc_fake, g_loss] logs = [d_loss, d_acc, g_loss] write_log(callback, loss_names, logs, cnt) if cnt in save_it: self.save_weights(weight_dir, cnt) tqdm.write( 'iteration: {}, [Discriminator :: d_loss: {}, d_acc: {}], [ Generator :: loss: {}]' .format(cnt, d_loss, d_acc, g_loss)) self.save_weights(weight_dir, iterations) print('done') def save_weights(self, weight_dir, iteration): #save weights out_22 = 'c3d_TC_GAN_22_outputs_it{}.hdf5'.format(iteration) self.c3d_model.save_weights(os.path.join(weight_dir, out_22)) # self.GAN.save_weights(os.path.join(weight_dir,'GAN.hdf5')) #remove the last node from output layer # self.remove_last_output() # out_21 = 'c3d_TC_GAN_21_outputs_it{}.hdf5'.format(iteration) # self.c3d_model.save_weights(os.path.join(weight_dir,out_21)) def add_outputs(self, n_new_outputs): #Increment the number of outputs new_n_classes = self.n_classes + n_new_outputs weights = self.c3d_model.get_layer('fc8').get_weights() #Adding new weights, weights will be 0 and the connections random shape = weights[0].shape[0] weights[1] = np.concatenate((weights[1], np.zeros(n_new_outputs)), axis=0) weights[0] = np.concatenate( (weights[0], -0.0001 * np.random.random_sample( (shape, n_new_outputs)) + 0.0001), axis=1) #Deleting the old output layer self.c3d_model.layers.pop() last_layer = self.c3d_model.get_layer('dropout_2').output #New output layer out = Dense(new_n_classes, activation='softmax', name='fc8')(last_layer) self.c3d_model = Model(input=self.c3d_model.input, output=out) #set weights to the layer self.c3d_model.get_layer('fc8').set_weights(weights) # print(weights[0]) def remove_last_output(self): w = self.c3d_model.get_layer('fc8').get_weights() w[0] = np.delete(w[0], np.s_[-1], axis=1) w[1] = np.delete(w[1], np.s_[-1]) #Deleting the old output layer self.c3d_model.layers.pop() last_layer = self.c3d_model.get_layer('dropout_2').output #New output layer out = Dense(self.n_classes, activation='softmax', name='fc8')(last_layer) self.c3d_model = Model(inputs=self.c3d_model.input, outputs=out) self.c3d_model.get_layer('fc8').set_weights(w)
def run_MURA(batch_size, nb_epoch, depth, nb_dense_block, nb_filter, growth_rate, dropout_rate, learning_rate, weight_decay, plot_architecture): """ Run MURA experiments :param batch_size: int -- batch size :param nb_epoch: int -- number of training epochs :param depth: int -- network depth :param nb_dense_block: int -- number of dense blocks :param nb_filter: int -- initial number of conv filter :param growth_rate: int -- number of new filters added by conv layers :param dropout_rate: float -- dropout rate :param learning_rate: float -- learning rate :param weight_decay: float -- weight decay :param plot_architecture: bool -- whether to plot network architecture """ ################### # Data processing # ################### #/home/yu/Documents/tensorflow/MURA/MURA-v1.1 # the path of MURA dataset im_size = 320 #测试修改参数 size root_path nb_epoch nb_dense_block X_train_path, Y_train = data_loader.load_path(root_path='../train', size=im_size) X_valid_path, Y_valid = data_loader.load_path(root_path='../valid', size=im_size) X_valid = data_loader.load_image(X_valid_path, im_size) #提前加载验证集 Y_valid = np.asarray(Y_valid) nb_classes = 1 img_dim = (im_size, im_size, 1) #加上最后一个维度,类型为tuple ################### # Construct model # ################### model = densenet.DenseNet(nb_classes, img_dim, depth, nb_dense_block, growth_rate, nb_filter, dropout_rate=dropout_rate, weight_decay=weight_decay) # Model output model.summary() # Build optimizer opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=["accuracy"]) if plot_architecture: from keras.utils import plot_model plot_model(model, to_file='./figures/densenet_archi.png', show_shapes=True) #################### # Network training # #################### print("Start Training") list_train_loss = [] list_valid_loss = [] list_learning_rate = [] best_record = [100, 0, 100, 100] #记录最优 [验证集损失函数值,准确率,训练集数据集loss差值,acc差值] start_time = datetime.datetime.now() for e in range(nb_epoch): if e == int(0.25 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 10.)) if e == int(0.5 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 50.)) if e == int(0.75 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 100.)) split_size = batch_size num_splits = len(X_train_path) / split_size arr_all = np.arange(len(X_train_path)).astype(int) random.shuffle(arr_all) #随机打乱index索引顺序 arr_splits = np.array_split(arr_all, num_splits) l_train_loss = [] batch_train_loss = [] start = datetime.datetime.now() for i, batch_idx in enumerate(arr_splits): X_batch_path, Y_batch = [], [] for idx in batch_idx: X_batch_path.append(X_train_path[idx]) Y_batch.append(Y_train[idx]) X_batch = data_loader.load_image(Path=X_batch_path, size=im_size) Y_batch = np.asarray(Y_batch) train_logloss, train_acc = model.train_on_batch(X_batch, Y_batch) l_train_loss.append([train_logloss, train_acc]) batch_train_loss.append([train_logloss, train_acc]) if i % 100 == 0: loss_1, acc_1 = np.mean(np.array(l_train_loss), 0) loss_2, acc_2 = np.mean(np.array(batch_train_loss), 0) batch_train_loss = [] #当前100batch的损失函数和准确率 print( '[Epoch {}/{}] [Batch {}/{}] [Time: {}] [all_batchs--> train_epoch_logloss: {:.5f}, train_epoch_acc:{:.5f}] ' .format(e + 1, nb_epoch, i, len(arr_splits), datetime.datetime.now() - start, loss_1, acc_1), '[this_100_batchs-->train_batchs_logloss: {:.5f}, train_batchs_acc:{:.5f}]' .format(loss_2, acc_2)) # 运行验证集 valid_logloss, valid_acc = model.evaluate(X_valid, Y_valid, verbose=0, batch_size=64) list_train_loss.append(np.mean(np.array(l_train_loss), 0).tolist()) list_valid_loss.append([valid_logloss, valid_acc]) list_learning_rate.append(float(K.get_value(model.optimizer.lr))) # to convert numpy array to json serializable print('[Epoch %s/%s] [Time: %s, Total_time: %s]' % (e + 1, nb_epoch, datetime.datetime.now() - start, datetime.datetime.now() - start_time), end='') print( '[train_loss_and_acc:{:.5f} {:.5f}] [valid_loss_acc:{:.5f} {:.5f}]' .format(list_train_loss[-1][0], list_train_loss[-1][1], list_valid_loss[-1][0], list_valid_loss[-1][1])) d_log = {} d_log["batch_size"] = batch_size d_log["nb_epoch"] = nb_epoch d_log["optimizer"] = opt.get_config() d_log["train_loss"] = list_train_loss d_log["valid_loss"] = list_valid_loss d_log["learning_rate"] = list_learning_rate json_file = os.path.join('./log/experiment_log_MURA.json') with open(json_file, 'w') as fp: json.dump(d_log, fp, indent=4, sort_keys=True) record = [ valid_logloss, valid_acc, abs(valid_logloss - list_train_loss[-1][0]), abs(valid_acc - list_train_loss[-1][1]), ] if ((record[0] <= best_record[0]) & (record[1] >= best_record[1])): if e <= int(0.25 * nb_epoch) | (record[2] <= best_record[2]) & ( record[3] <= best_record[3]): #四分之一epoch之后加入差值判定 best_record = record #记录最小的 [验证集损失函数值,准确率,训练集数据loss差值,acc差值] print('saving the best model:epoch', e + 1, best_record) model.save('save_models/best_MURA_modle@epochs{}.h5'.format(e + 1)) model.save('save_models/MURA_modle@epochs{}.h5'.format(e + 1))
# train disc on fake df_loss = disc.train_on_batch([x[i], re_shape(fake)], fake_y) # train combined disc.trainable = False g_loss = combined.train_on_batch(x[i], [ np.reshape(y[i], (1, sequence_length * size * size, output)), real_y ]) disc.trainable = True log.write( str(e) + ", " + str(s) + ", " + str(dr_loss) + ", " + str(df_loss) + ", " + str(g_loss[0]) + ", " + str(g_loss[1]) + ", " + str(opt_dcgan.get_config()["lr"]) + "\n") progbar.add(1) # validation sequence = validation[random.randrange(0, len(validation))] x, y = load(sequence, sequence_length) for i in range(len(x)): random_index = random.randrange(0, len(x)) generated_y = gen.predict(x[random_index]) save_image(x[random_index] / 2 + 0.5, y[random_index], re_shape(generated_y), validation_dir + "e_{}.png".format(e)) # save weights
def run_MURA(batch_size, nb_epoch, depth, nb_dense_block, nb_filter, growth_rate, dropout_rate, learning_rate, weight_decay, plot_architecture): """ Run MURA experiments :parameter batch_size: int -- batch size :parameter nb_epoch: int -- number of training epochs :parameter depth: int -- network depth :parameter nb_dense_block: int -- number of dense blocks :parameter nb_filter: int -- initial number of conv filter :parameter growth_rate: int -- number of new filters added by conv layers :parameter dropout_rate: float -- dropout rate :parameter learning_rate: float -- learning rate :parameter weight_decay: float -- weight decay :parameter plot_architecture: bool -- whether to plot network architecture """ ################### # Data processing # ################### im_size = 320 #Test modification parameters size root_path nb_epoch nb_dense_block X_train_path, Y_train = data_loader.load_path( root_path='./train/XR_HUMERUS', size=im_size) X_valid_path, Y_valid = data_loader.load_path( root_path='./valid/XR_HUMERUS', size=im_size) X_valid = data_loader.load_image( X_valid_path, im_size) #Load verification set ahead of time Y_valid = np.asarray(Y_valid) nb_classes = 1 img_dim = (im_size, im_size, 1) #Plus the last dimension, type is tuple ################### # Construct model # ################### model = densenet.DenseNet(nb_classes, img_dim, depth, nb_dense_block, growth_rate, nb_filter, dropout_rate=dropout_rate, weight_decay=weight_decay) # Model output model.summary() # Build optimizer opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=["accuracy"]) if plot_architecture: from keras.utils import plot_model plot_model(model, to_file='./figures/densenet_archi.png', show_shapes=True) #################### # Network training # #################### print("Start Training") list_train_loss = [] list_valid_loss = [] list_learning_rate = [] best_record = [ 100, 0, 100, 100 ] #Recording optimal [verification set loss function value, accuracy rate, training set data set loss difference,acc difference] start_time = datetime.datetime.now() for e in range(nb_epoch): if e == int(0.25 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 10.)) if e == int(0.5 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 50.)) if e == int(0.75 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 100.)) split_size = batch_size num_splits = len(X_train_path) / split_size arr_all = np.arange(len(X_train_path)).astype(int) random.shuffle(arr_all) #Randomly disrupted index order arr_splits = np.array_split(arr_all, num_splits) l_train_loss = [] batch_train_loss = [] start = datetime.datetime.now() for i, batch_idx in enumerate(arr_splits): X_batch_path, Y_batch = [], [] for idx in batch_idx: X_batch_path.append(X_train_path[idx]) Y_batch.append(Y_train[idx]) X_batch = data_loader.load_image(Path=X_batch_path, size=im_size) Y_batch = np.asarray(Y_batch) train_logloss, train_acc = model.train_on_batch(X_batch, Y_batch) l_train_loss.append([train_logloss, train_acc]) batch_train_loss.append([train_logloss, train_acc]) if i % 100 == 0: loss_1, acc_1 = np.mean(np.array(l_train_loss), 0) loss_2, acc_2 = np.mean(np.array(batch_train_loss), 0) batch_train_loss = [ ] #Current 100 batch loss function and accuracy print( '[Epoch {}/{}] [Batch {}/{}] [Time: {}] [all_batchs--> train_epoch_logloss: {:.5f}, train_epoch_acc:{:.5f}] ' .format(e + 1, nb_epoch, i, len(arr_splits), datetime.datetime.now() - start, loss_1, acc_1), '[this_100_batchs-->train_batchs_logloss: {:.5f}, train_batchs_acc:{:.5f}]' .format(loss_2, acc_2)) # Run verification set valid_logloss, valid_acc = model.evaluate(X_valid, Y_valid, verbose=0, batch_size=64) list_train_loss.append(np.mean(np.array(l_train_loss), 0).tolist()) list_valid_loss.append([valid_logloss, valid_acc]) list_learning_rate.append(float(K.get_value(model.optimizer.lr))) # to convert numpy array to json serializable print('[Epoch %s/%s] [Time: %s, Total_time: %s]' % (e + 1, nb_epoch, datetime.datetime.now() - start, datetime.datetime.now() - start_time), end='') print( '[train_loss_and_acc:{:.5f} {:.5f}] [valid_loss_acc:{:.5f} {:.5f}]' .format(list_train_loss[-1][0], list_train_loss[-1][1], list_valid_loss[-1][0], list_valid_loss[-1][1])) d_log = {} d_log["batch_size"] = batch_size d_log["nb_epoch"] = nb_epoch d_log["optimizer"] = opt.get_config() d_log["train_loss"] = list_train_loss d_log["valid_loss"] = list_valid_loss d_log["learning_rate"] = list_learning_rate json_file = os.path.join('./log/experiment_log_MURA.json') with open(json_file, 'w') as fp: json.dump(d_log, fp, indent=4, sort_keys=True) record = [ valid_logloss, valid_acc, abs(valid_logloss - list_train_loss[-1][0]), abs(valid_acc - list_train_loss[-1][1]), ] if ((record[0] <= best_record[0]) & (record[1] >= best_record[1])): if e <= int(0.25 * nb_epoch) | (record[2] <= best_record[2]) & ( record[3] <= best_record[3] ): #Add a difference judgment after a quarter epoch best_record = record #Record the smallest [validation set loss function value, accuracy rate, training set data loss difference, acc difference] print('saving the best model:epoch', e + 1, best_record) model.save('save_models/best_MURA_modle@epochs{}.h5'.format(e + 1)) model.save('save_models/MURA_modle@epochs{}.h5'.format(e + 1))
theta=theta, regimes=regimes) print("-- Training episodes =", train_episodes) print("-- Evaluation episodes =", eval_episodes) print("-- Batch size =", batch_size) print("-- Initial replay memory size =", init_d_size) print("-- Maximum replay memory size =", max_d_size) print("-- Update target network after", target_update, "episodes.") print("-- Dimension of state representation =", dim_state) print("-- Number of actions (different possible portfolio allocations) =", dim_actions) print("-- Number of hidden layers =", len(hidden_dims)) print("-- Nodes in each hidden layer = ", str(hidden_dims)[1:-1]) print("-- Optimizer configuration:") print(pd.DataFrame.from_dict(optimizer.get_config(), orient="index")) print("-- Discount factor for TD-target = ", gamma) print("-- Epsilon starting value (for exploration) =", epsilon) print("-- Epsilon decay factor =", epsilon_decay) print("-- Transaction cost factor =", tcost) print("-- Investor's investment horizon =", horizon) print("-- Investor's initial wealth =", w) print("-- Investor's risk aversion factor =", theta) print("-- Asset log-return distribution parameters in different regimes:") print(pd.DataFrame.from_dict(regimes)) print("-- Evaluation state space (from, to, steps):", eval_w_start, eval_w_end, eval_w_points) reg_periods = [v["periods"] for v in regimes.values()] reg_lengths = [len(r) for r in reg_periods]
def model(trainX, trainY, valX, valY): ''' Model providing function ''' model_callbacks = [] img_rows = 64 img_cols = 80 smooth = 1. batch_size = 16 #passing argument 'test' I only train the model for 1 epoch #passing argument 'epochN' (with N as a positive int) I train the model for N epochs nb_epoch = 300 try: nb_epoch = find_argument("epoch") except ValueError: pass try: find_argument("test") nb_epoch = 1 except ValueError: pass act = 'relu' base_layer_depth = 32 lmbda = 0.1 l2reg = l2(lmbda) dropout = 0.5 opt = Adam() #Adadelta() ##transforming optimizer and parameters to string optstr = str(opt.__class__).split(".")[2][:-2] lr = opt.get_config() lr = lr['lr'] optstr = optstr + '_lr-{0:.6g}'.format(lr) pixel_offset = 2 ### pixel_offset is converted into percentage compared to the image's pixel size pixel_offset_w = pixel_offset / img_cols pixel_offset_h = pixel_offset / img_rows print "inputsize: " + str(img_rows) + ' ' + str(img_cols) print "opt: " + str(optstr) print "dropout: " + str(dropout) print "batch_size: " + str(batch_size) print "lambda l2 : " + str(lmbda) print "pixel_offset : " + str(pixel_offset) ################### callbacks ################### modelDir = 'models/logs_D-{0:.3f}'.format( dropout) + '_o-' + optstr + '_lmd-' + str(lmbda) + '_px-' + str( pixel_offset) mkdir(modelDir) early = EarlyStopping(monitor='val_loss', patience=150, verbose=1, mode='auto') #Callback to save the best epoch and, eventually, overwrite it if outperformed (regarding the same model) checkpoint_name = modelDir + '/best_model.h5' #.{epoch:02d}-{val_loss:.4f}.h5' checkpoint = ModelCheckpoint(checkpoint_name, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto') #Tensorboard for each result #tb_callback = TensorBoard(log_dir="./"+modelDir, histogram_freq=0, write_graph=True) #WeightsGIF and ActivationsGIF weigthsSave = WeightsGIF(modelDir, 1) fileSave = FileMonitor(modelDir) #activationsSave = ActivationsGIF(modelDir, 1, trainX[0]) #model_callbacks.append(tb_callback) model_callbacks.append(checkpoint) model_callbacks.append(early) model_callbacks.append(weigthsSave) model_callbacks.append(fileSave) #model_callbacks.append(activationsSave) ################### Model and Layers definition ################### image_input = Input((img_rows, img_cols, 3), name="images") conv1 = Convolution2D(base_layer_depth, 5, 5, activation='relu', border_mode='same', W_regularizer=l2reg, b_regularizer=l2reg)(image_input) conv1 = core.Dropout(dropout)(conv1) conv1 = Convolution2D(base_layer_depth, 5, 5, activation='relu', border_mode='same', W_regularizer=l2reg, b_regularizer=l2reg)(conv1) conv1 = core.Dropout(dropout)(conv1) pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) conv2 = Convolution2D(base_layer_depth * 2, 3, 3, activation='relu', border_mode='same', W_regularizer=l2reg, b_regularizer=l2reg)(pool1) conv2 = core.Dropout(dropout)(conv2) conv2 = Convolution2D(base_layer_depth * 2, 3, 3, activation='relu', border_mode='same', W_regularizer=l2reg, b_regularizer=l2reg)(conv2) conv2 = core.Dropout(dropout)(conv2) pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) conv3 = Convolution2D(base_layer_depth * 4, 3, 3, activation='relu', border_mode='same', W_regularizer=l2reg, b_regularizer=l2reg)(pool2) conv3 = core.Dropout(dropout)(conv3) conv3 = Convolution2D(base_layer_depth * 4, 3, 3, activation='relu', border_mode='same', W_regularizer=l2reg, b_regularizer=l2reg)(conv3) conv3 = core.Dropout(dropout)(conv3) pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) conv4 = Convolution2D(base_layer_depth * 8, 3, 3, activation='relu', border_mode='same', W_regularizer=l2reg, b_regularizer=l2reg)(pool3) conv4 = core.Dropout(dropout)(conv4) conv4 = Convolution2D(base_layer_depth * 8, 3, 3, activation='relu', border_mode='same', W_regularizer=l2reg, b_regularizer=l2reg)(conv4) conv4 = core.Dropout(dropout)(conv4) pool4 = MaxPooling2D(pool_size=(2, 2))(conv4) conv5 = Convolution2D(base_layer_depth * 16, 3, 3, activation='relu', border_mode='same', W_regularizer=l2reg, b_regularizer=l2reg)(pool4) conv5 = core.Dropout(dropout)(conv5) conv5 = Convolution2D(base_layer_depth * 16, 3, 3, activation='relu', border_mode='same', W_regularizer=l2reg, b_regularizer=l2reg)(conv5) conv5 = core.Dropout(dropout)(conv5) flat = core.Flatten()(conv5) dense = core.Dense(256, activation='relu')(flat) dense = core.Dense(16, activation='relu')(dense) #Auxiliary Inputs aux_inputs_list = [] for label in input_labels: if not label == "images": aux_inputs_list.append( Input((trainX[label].shape[1], ), name=label)) inputs_list = [image_input] for element in aux_inputs_list: inputs_list.append(element) merge_list = [dense] + aux_inputs_list merge_layer = merge(merge_list, mode='concat', concat_axis=1, name="merging") dense_final = core.Dense(128, activation='relu', name="final_1")(merge_layer) dense_final = core.Dropout(dropout)(dense_final) dense_final = core.Dense(64, activation='relu', name="final_2")(dense_final) dense_final = core.Dropout(dropout)(dense_final) prediction = core.Dense(trainY.shape[1], activation='softmax', name="output")(dense_final) model = Model(input=inputs_list, output=prediction) model.summary() model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) hist = model.fit(trainX, trainY, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, callbacks=model_callbacks, validation_data=(valX, valY)) ################### metrics reporting ################### val_loss, val_acc = model.evaluate(valX, valY, verbose=0) name_file_save = 'final_model' keras_model_save(model, modelDir, name_file_save) return {'loss': val_loss, 'status': STATUS_OK}
val_loss, val_acc, val_f2_score = model.evaluate(X_val, y_val, verbose=1, batch_size=batch_size) list_test_loss.append([val_loss, val_acc, val_f2_score]) list_learning_rate.append(float(K.get_value(model.optimizer.lr))) # to convert numpy array to json serializable print('Epoch %s/%s, Time: %s' % (e + 1, epochs, time.time() - start)) model.save('./model/last-epoch-model.h5') d_log = {} d_log["batch_size"] = batch_size d_log["nb_epoch"] = epochs d_log["optimizer"] = optimizer.get_config() d_log["train_loss"] = list_train_loss d_log["test_loss"] = list_test_loss d_log["learning_rate"] = list_learning_rate json_file = os.path.join('./logs/experiment_Planet_Densenet.json') with open(json_file, 'w') as fp: json.dump(d_log, fp, indent=4, sort_keys=True) # for e in range(epochs): # print("epoch %d" % e) # for train_slice in train_slices[0]: # X_train, y_train = load_train_data_slice(train_slice) # X_train = preprocess(X_train) # model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1, # callbacks=[lrate,csv_logger,tensorboard])
def run_MURA( batch_size=8, # select a batch of samples to train a time nb_epoch=12, # times of iteration depth=22, # network depth nb_dense_block=4, # number of dense blocks nb_filter=16, # initial number of conv filter growth_rate=12, # numbers of new filters added by each layer dropout_rate=0.2, # dropout rate learning_rate=0.001, # learning rate weight_decay=1E-4, # wight decay plot_architecture=False # plot network architecture ): ################### # Data processing # ################### im_size = 320 # resize images path_train = '/home/yu/Documents/tensorflow/MURA/MURA-v1.1/train/XR_ELBOW' # the absolute path path_valid = '/home/yu/Documents/tensorflow/MURA/MURA-v1.1/valid/XR_ELBOW' X_train_path, Y_train = data_loader.load_path(root_path=path_train, size=im_size) X_valid_path, Y_valid = data_loader.load_path(root_path=path_valid, size=im_size) X_valid = data_loader.load_image(X_valid_path, im_size) # import path for validation Y_valid = np.asarray(Y_valid) nb_classes = 1 img_dim = (im_size, im_size, 1) #tuple channel last ################### # Construct model # ################### # model is one instance of class 'Model' model = densenet.DenseNet(nb_classes, img_dim, depth, nb_dense_block, growth_rate, nb_filter, dropout_rate=dropout_rate, weight_decay=weight_decay) # Model output model.summary() # Build optimizer opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model.compile( loss='binary_crossentropy', optimizer=opt, # optimizer used to update gradient metrics=["accuracy"]) if plot_architecture: from keras.utils import plot_model plot_model(model, to_file='./figures/densenet_archi.png', show_shapes=True) #################### # Network training # #################### print("Start Training") list_train_loss = [] list_valid_loss = [] list_learning_rate = [] best_record = [100, 0, 100, 100] # record the best result start_time = datetime.datetime.now() for e in range(nb_epoch): if e == int(0.25 * nb_epoch): # update learning_rate K.set_value(model.optimizer.lr, np.float32(learning_rate / 10.)) if e == int(0.5 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 50.)) if e == int(0.75 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 100.)) split_size = batch_size num_splits = len( X_train_path ) / split_size # Calculate how many batches of training images arr_all = np.arange(len(X_train_path)).astype( int) # Return evenly spaced values within a given interval random.shuffle( arr_all ) # reshuffle, so the order of each training would be different # avoid local optimal solution # with shuffle open, it would be SGD arr_splits = np.array_split( arr_all, num_splits) # Divede the training images to num_splits batches l_train_loss = [] batch_train_loss = [] start = datetime.datetime.now() for i, batch_idx in enumerate( arr_splits): # i: how many batches, batch_idx: each batch X_batch_path, Y_batch = [], [ ] # X_batch_path is the path of images, Y_batch is the label for idx in batch_idx: X_batch_path.append(X_train_path[idx]) Y_batch.append(Y_train[idx]) X_batch = data_loader.load_image( Path=X_batch_path, size=im_size) # load data for training Y_batch = np.asarray( Y_batch ) # Transform the type of Y_batch as array, that is label train_logloss, train_acc = model.train_on_batch( X_batch, Y_batch) # train, return loss and accuracy l_train_loss.append([train_logloss, train_acc]) batch_train_loss.append([train_logloss, train_acc]) if i % 100 == 0: # 100 batches loss_1, acc_1 = np.mean(np.array(l_train_loss), 0) loss_2, acc_2 = np.mean(np.array(batch_train_loss), 0) batch_train_loss = [] print( '[Epoch {}/{}] [Batch {}/{}] [Time: {}] [all_batchs--> train_epoch_logloss: {:.5f}, train_epoch_acc:{:.5f}] ' .format(e + 1, nb_epoch, i, len(arr_splits), datetime.datetime.now() - start, loss_1, acc_1), '[this_100_batchs-->train_batchs_logloss: {:.5f}, train_batchs_acc:{:.5f}]' .format(loss_2, acc_2)) # validate valid_logloss, valid_acc = model.evaluate(X_valid, Y_valid, verbose=0, batch_size=64) list_train_loss.append(np.mean(np.array(l_train_loss), 0).tolist()) list_valid_loss.append([valid_logloss, valid_acc]) list_learning_rate.append(float(K.get_value(model.optimizer.lr))) # to convert numpy array to json serializable print('[Epoch %s/%s] [Time: %s, Total_time: %s]' % (e + 1, nb_epoch, datetime.datetime.now() - start, datetime.datetime.now() - start_time), end='') print( '[train_loss_and_acc:{:.5f} {:.5f}] [valid_loss_acc:{:.5f} {:.5f}]' .format(list_train_loss[-1][0], list_train_loss[-1][1], list_valid_loss[-1][0], list_valid_loss[-1][1])) d_log = {} d_log["batch_size"] = batch_size d_log["nb_epoch"] = nb_epoch d_log["optimizer"] = opt.get_config() d_log["train_loss"] = list_train_loss d_log["valid_loss"] = list_valid_loss d_log["learning_rate"] = list_learning_rate json_file = os.path.join('./log/experiment_log_MURA.json') with open(json_file, 'w') as fp: json.dump(d_log, fp, indent=4, sort_keys=True) record = [ valid_logloss, valid_acc, abs(valid_logloss - list_train_loss[-1][0]), abs(valid_acc - list_train_loss[-1][1]), ] if ((record[0] <= best_record[0]) & (record[1] >= best_record[1])): if e <= int(0.25 * nb_epoch) | (record[2] <= best_record[2]) & ( record[3] <= best_record[3]): best_record = record print('saving the best model:epoch', e + 1, best_record) model.save('save_models/best_MURA_modle@epochs{}.h5'.format(e + 1)) model.save('save_models/MURA_modle@epochs{}.h5'.format(e + 1))