def fit_model(X_train, y_train, X_val, y_val, G): epochs = 5 es_patience = 5 lr_patience = 3 dropout1 = None depth = 25 #40 nb_dense_block = 3 nb_filter = 18 growth_rate = 18 lr = 3E-1 weight_file = 'keras_densenet_simple_wt_30Sept.h5' bn = True reduction_ = 0.5 nb_classes = 1 img_dim = (2, 96, 96) n_channels = 2 model = DenseNet(depth=depth, nb_dense_block=nb_dense_block, growth_rate=growth_rate, nb_filter=nb_filter, dropout_rate=dropout1, activation='sigmoid', input_shape=img_dim, include_top=True, bottleneck=bn, reduction=reduction_, classes=nb_classes, pooling='avg', weights=None) model.summary() opt = Adam(lr=lr) parallel_model = multi_gpu_model(model, gpus=G) parallel_model.compile(loss=binary_crossentropy, optimizer=Adadelta(), metrics=['accuracy']) es = EarlyStopping(monitor='val_loss', patience=es_patience, verbose=1) #es = EarlyStopping(monitor='val_acc', patience=es_patience,verbose=1,restore_best_weights=True) checkpointer = ModelCheckpoint(filepath=weight_file, verbose=1, save_best_only=True) lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1), cooldown=0, patience=lr_patience, min_lr=0.5e-6, verbose=1) parallel_model.fit(X_train, y_train, batch_size=64 * G, epochs=epochs, callbacks=[es, lr_reducer, checkpointer], validation_data=(X_val, y_val), verbose=2) score, acc = parallel_model.evaluate(X_val, y_val) print('current Test accuracy:', acc) pred = parallel_model.predict(X_val) auc_score = roc_auc_score(y_val, pred) print("current auc_score ------------------> ", auc_score) """model = load_model(weight_file) #This is the best model score, acc = model.evaluate(X_val, y_val) print('Best saved model Test accuracy:', acc) pred = model.predict(X_val) auc_score = roc_auc_score(y_val,pred) print("best saved model auc_score ------------------> ",auc_score)""" return auc_score, parallel_model
L_R_new, nt_order) model1 = Sequential() model1.add(Dense(40, input_dim=2 * k * (nb_classes), init='he_normal')) model1.add(Activation('relu')) model1.add(Dense(40, init='he_normal')) model1.add(Activation('relu')) model1.add(Dense(40, init='he_normal')) model1.add(Activation('relu')) model1.add(Dense(256, init='he_normal')) model1.add(Activation('softmax')) rms = RMSprop(lr=0.001, rho=0.9, epsilon=1e-06, clipnorm=1.5) adagrad = Adagrad(clipnorm=1.5) adam = Adam() adadelta = Adadelta() sgd = SGD(lr=0.01, decay=1e-6, momentum=0.95, nesterov=True, clipnorm=1.0) model1.compile(loss='poisson', optimizer=adam) model1.fit(C, Y, nb_epoch=50, batch_size=64000, show_accuracy=False, verbose=1) f_out1 = open("%s_1.fasta" % name_out, "w")
def pass_arg(Xx, nsim, tr_size, droprate): print("Tr_size:", tr_size) def fix_seeds(seed): random.seed(seed) np.random.seed(seed) tf.random.set_seed(seed) session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf) # K.set_session(sess) tf.compat.v1.keras.backend.set_session(sess) ss = 1 fix_seeds(ss) # MC dropout class MCDropout(Dropout): def call(self, inputs, training=None): return super(MCDropout, self).call(inputs, training=True) # import pickle # def save_obj(obj, name): # with open(name, 'wb') as f: # pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) # Compute the RMSE given the ground truth (y_true) and the predictions(y_pred) def root_mean_squared_error(y_true, y_pred): return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) # Making sure dimensionless bond length is less than 1 def bond(bl): return tf.add(K.relu(tf.negative(bl)), K.relu(bl - 1.0)) # Making sure final porosity is less than initial def poros(poroi, porof): return K.relu(tf.negative(porof)) + K.relu(porof - poroi) def strength1(bl, porof, nlayer=6): sigma01, sigma02, C1s = 6, 31, 21 sigma_long = sigma01 * (K.exp( (1.0 - porof)**(C1s * nlayer)) - porof) + sigma02 * (1.0 - porof) n = K.shape(sigma_long)[0] sorted_strength, sortedIndices = tf.math.top_k(sigma_long, n, True) sorted_bl = K.gather(bl, sortedIndices) sorted_porof = K.gather(porof, sortedIndices) argg = tf.argsort(sorted_bl, axis=-1, direction='DESCENDING', stable=False, name=None) sorted_bl_corr = K.gather(sorted_bl, argg) return sorted_bl_corr - sorted_bl def strength2(bl, porof, nlayer=6): sigma01, sigma02, C1s = 6, 31, 21 sigma_long = sigma01 * (K.exp( (1.0 - porof)**(C1s * nlayer)) - porof) + sigma02 * (1.0 - porof) n = K.shape(sigma_long)[0] sorted_strength, sortedIndices = tf.math.top_k(sigma_long, n, True) sorted_bl = K.gather(bl, sortedIndices) n = K.cast(n, tf.float32) rel = K.relu(sorted_bl[1:] - sorted_bl[0:-1]) num_vio = K.cast(tf.math.count_nonzero(rel), tf.float32) return num_vio / n def phy_loss_mean(params): # useful for cross-checking training loss1, loss2, loss3, loss4, lam1, lam2, lam3, lam4 = params def loss(y_true, y_pred): # return lam1*K.mean(K.relu(loss1)) + lam2*K.mean(K.relu(loss2)) + lam2*K.mean(K.relu(loss3)) return lam1 * K.mean(K.relu(loss1)) + lam2 * K.mean( K.relu(loss2)) + lam3 * K.mean(K.relu(loss3)) + lam4 * loss4 return loss #function to calculate the combined loss = sum of rmse and phy based loss def combined_loss(params): loss1, loss2, loss3, loss4, lam1, lam2, lam3, lam4 = params def loss(y_true, y_pred): # return mean_squared_error(y_true, y_pred) + lam1 * K.mean(K.relu(loss1)) + lam2 * K.mean(K.relu(loss2)) + lam2 * K.mean(K.relu(loss3)) return mean_squared_error(y_true, y_pred) + lam1 * K.mean( K.relu(loss1)) + lam2 * K.mean(K.relu(loss2)) + lam3 * K.mean( K.relu(loss3)) + lam4 * loss4 return loss def PGNN_train_test(optimizer_name, optimizer_val, use_YPhy, pre_train, tr_size, lamda, iteration, n_nodes, n_layers, drop_frac, reg, samp): # fix_seeds(ss) # Hyper-parameters of the training process # batch_size = tr_size batch_size = 1 num_epochs = 300 val_frac = 0.25 patience_val = 80 # Initializing results filename exp_name = "DNN_pre_loss_" + pre_train + optimizer_name + '_trsize' + str( tr_size) + '_lamda' + str(lamda) + '_iter' + str(iteration) exp_name = exp_name.replace('.', 'pt') results_dir = '../results/' model_name = results_dir + exp_name + '_model.h5' # storing the trained model if reg == True and samp == 25: results_name = results_dir + exp_name + '_results_25_regularizer.dat' # storing the results of the model elif reg == False and samp == 25: results_name = results_dir + exp_name + '_results_25.dat' # storing the results of the model elif reg == True and samp == 1519: results_name = results_dir + exp_name + '_results_1519_regularizer.dat' # storing the results of the model elif reg == False and samp == 1519: results_name = results_dir + exp_name + '_results_1519.dat' # storing the results of the model # Load labeled data data = np.loadtxt('../data/labeled_data.dat') x_labeled = data[:, : 2] # -2 because we do not need porosity predictions y_labeled = data[:, -3: -1] # dimensionless bond length and porosity measurements if samp == 25: data = np.loadtxt('../data/unlabeled_data_BK_constw_v2_25.dat') x_unlabeled = data[:, :] elif samp == 1519: data = np.loadtxt('../data/unlabeled_data_BK_constw_v2_1525.dat') x_unlabeled = data[:, :] x_unlabeled1 = x_unlabeled[:1303, :] x_unlabeled2 = x_unlabeled[-6:, :] x_unlabeled = np.vstack((x_unlabeled1, x_unlabeled2)) # initial porosity init_poro = x_unlabeled[:, -1] x_unlabeled = x_unlabeled[:, :2] # normalize dataset with MinMaxScaler scaler = preprocessing.MinMaxScaler(feature_range=(0.0, 1.0)) x_labeled = scaler.fit_transform(x_labeled) x_unlabeled = scaler.fit_transform(x_unlabeled) # y_labeled = scaler.fit_transform(y_labeled) # # initial porosity & physics outputs are removed # x_unlabeled = x_unlabeled[:, :-3] # train and test data trainX, trainY = x_labeled[:tr_size, :], y_labeled[:tr_size] # testX, testY = x_labeled[tr_size:,:], y_labeled[tr_size:] testX, testY = x_labeled[tr_size:, :], y_labeled[tr_size:] if use_YPhy == 0: # Removing the last column from x_unlabeled (corresponding to Y_PHY) x_unlabeled = x_unlabeled[:, :-1] dependencies = {'root_mean_squared_error': root_mean_squared_error} # load the pre-trained model using non-calibrated physics-based model predictions (./data/unlabeled.dat) loaded_model = load_model(results_dir + pre_train, custom_objects=dependencies) # Creating the model model = Sequential() for layer in np.arange(n_layers): if layer == 0: model.add( Dense(n_nodes, activation='relu', input_shape=(np.shape(trainX)[1], ))) else: if reg: model.add( Dense(n_nodes, activation='relu', kernel_regularizer=l1_l2(l1=.001, l2=.001))) else: model.add(Dense(n_nodes, activation='relu')) model.add(Dropout(rate=drop_frac)) # model.add(MCDropout(rate=drop_frac)) model.add(Dense(2, activation='linear')) # pass the weights to all layers but 1st input layer, whose dimensions are updated for new_layer, layer in zip(model.layers[1:], loaded_model.layers[1:]): new_layer.set_weights(layer.get_weights()) # physics-based regularization uinp_sc = K.constant(value=x_unlabeled) # unlabeled input data lam1 = K.constant(value=lamda[0]) # regularization hyper-parameter lam2 = K.constant(value=lamda[1]) # regularization hyper-parameter lam3 = K.constant(value=lamda[2]) # regularization hyper-parameter lam4 = K.constant(value=lamda[3]) # regularization hyper-parameter predictions = model(uinp_sc) # model output at depth i # porosity = K.relu(predictions[:,1]) phyloss1 = bond(predictions[:, 0]) # physics loss 1 # uinp = K.constant(value=x_unlabeled_non) # unlabeled input data phyloss2 = poros(init_poro, predictions[:, 1]) # physics loss 1 phyloss3 = strength1(predictions[:, 0], predictions[:, 1]) phyloss4 = strength2(predictions[:, 0], predictions[:, 1]) totloss = combined_loss( [phyloss1, phyloss2, phyloss3, phyloss4, lam1, lam2, lam3, lam4]) phyloss = phy_loss_mean( [phyloss1, phyloss2, phyloss3, phyloss4, lam1, lam2, lam3, lam4]) model.compile(loss=totloss, optimizer=optimizer_val, metrics=[phyloss, root_mean_squared_error]) early_stopping = EarlyStopping(monitor='val_loss', patience=patience_val, verbose=1) # print('Running...' + optimizer_name) history = model.fit(trainX, trainY, batch_size=batch_size, epochs=num_epochs, verbose=0, validation_split=val_frac, callbacks=[early_stopping, TerminateOnNaN()]) # early_stopping = EarlyStopping(monitor='loss', patience=patience_val, verbose=1) # history = model.fit(trainX, trainY, # batch_size=batch_size, # epochs=num_epochs, # verbose=1, # callbacks=[early_stopping, TerminateOnNaN()]) # test_score = model.evaluate(testX, testY, verbose=0) # predictions = model.predict(x_labeled) # model output at depth i # print(np.sort(predictions[:,0], axis=0)) # predictions = model.predict(x_unlabeled) # model output at depth i # print(np.sort(predictions[:,0], axis=0)) # print('iter: ' + str(iteration) + ' useYPhy: ' + str(use_YPhy) + # ' nL: ' + str(n_layers) + ' nN: ' + str(n_nodes) + # ' lamda1: ' + str(lamda[0]) + ' lamda2: ' + str(lamda[1]) + ' trsize: ' + str(tr_size) + # ' TestRMSE: ' + str(test_score[2]) + ' PhyLoss: ' + str(test_score[1]), ' TestLoss: ' + str(test_score[0]), "\n") # # print('iter: ' + str(iteration) + ' TestRMSE: ' + str(test_score[2]) + ' PhyLoss: ' + str(test_score[1]), "\n") # # model.save(model_name) # # save results # results = {'train_loss_1':history.history['loss_1'], # 'val_loss_1':history.history['val_loss_1'], # 'train_rmse':history.history['root_mean_squared_error'], # 'val_rmse':history.history['val_root_mean_squared_error'], # 'test_rmse':test_score[2], # 'PhyLoss':test_score[1]} # results = {'train_loss_1':history.history['loss_1'], # 'train_rmse':history.history['root_mean_squared_error'], # 'test_rmse':test_score[2], # 'PhyLoss':test_score[1]} # save_obj(results, results_name) # predictions = model.predict(testX) # return results, results_name, predictions, testY, test_score[2], trainY test_score = model.evaluate(testX, testY, verbose=1) print(test_score) samples = [] for i in range(int(nsim)): print("simulation num:", i) predictions = model.predict(Xx) predictions = predictions[:, 1] samples.append(predictions) return np.array(samples) # Main Function if __name__ == '__main__': fix_seeds(1) # List of optimizers to choose from optimizer_names = [ 'Adagrad', 'Adadelta', 'Adam', 'Nadam', 'RMSprop', 'SGD', 'NSGD' ] optimizer_vals = [ Adagrad(clipnorm=1), Adadelta(clipnorm=1), Adam(clipnorm=1), Nadam(clipnorm=1), RMSprop(clipnorm=1), SGD(clipnorm=1.), SGD(clipnorm=1, nesterov=True) ] # selecting the optimizer optimizer_num = 1 optimizer_name = optimizer_names[optimizer_num] optimizer_val = optimizer_vals[optimizer_num] # Selecting Other Hyper-parameters drop_frac = droprate # Fraction of nodes to be dropped out use_YPhy = 1 # Whether YPhy is used as another feature in the NN model or not n_layers = 2 # Number of hidden layers n_nodes = 5 # Number of nodes per hidden layer # pre-trained model pre_train = 'Pre-trainAdadelta_drop0_nL2_nN5_trsize1308_iter0.h5' #set lamda lamda = [0.3, 0.15, 0.008, 0] # Physics-based regularization constant # # Iterating over different training fractions and splitting indices for train-test splits # trsize_range = [4,6,8,10,20] # #default training size = 5000 # tr_size = trsize_range[4] tr_size = int(tr_size) # use regularizer reg = True # sample size used samp = 1519 # samp = 25 # total number of runs iter_range = np.arange(1) testrmse = [] # iterating through all possible params for iteration in iter_range: # results, result_file, pred, obs, rmse, obs_train = PGNN_train_test(optimizer_name, optimizer_val, drop_frac, use_YPhy, # iteration, n_layers, n_nodes, tr_size, lamda, reg, samp) # testrmse.append(rmse) pred = PGNN_train_test(optimizer_name, optimizer_val, use_YPhy, pre_train, tr_size, lamda, iteration, n_nodes, n_layers, drop_frac, reg, samp) return np.squeeze(pred)
out = IdentityBlock(out, [64,64,96]) out = MaxPooling2D(pool_size=[1,2],strides=[1,2],padding='valid')(out) # 10x3 out = Flatten()(out) out = Dropout(0.7)(out) out = Dense(60, activation='relu')(out) out = Dense(2, activation = 'softmax')(out) ################################################################################################################# ################################################################################################################# ################################################################################################################# model = Model(inputs=[input], outputs=out) #### optimizer, loss sgd = SGD(lr=0.0002, decay=5e-2, momentum=0.9, nesterov=True) adadelta = Adadelta(lr=1.0, rho=0.95, epsilon=1e-08, decay=0.0) #### best #rmsprop = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) #adagrad = Adagrad(lr=0.01, epsilon=1e-08, decay=0.0) #adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) #adamax = Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) #nadam = Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004) model.compile(loss='categorical_crossentropy', optimizer=adadelta, metrics=['accuracy']) for i, layer in enumerate(model.layers): print(i, layer.name) print('\n model.summary: \n') model.summary() checkpointer = ModelCheckpoint(filepath='/home/jidian/chen_dnn/Models/weights6_10x400_Factory.hdf5', monitor='val_loss', verbose=1,
def pass_arg(Xx, nsim, tr_size): print("Tr_size:", tr_size) def fix_seeds(seed): random.seed(seed) np.random.seed(seed) tf.random.set_seed(seed) session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf) # K.set_session(sess) tf.compat.v1.keras.backend.set_session(sess) ss = 1 fix_seeds(ss) # MC dropout class MCDropout(Dropout): def call(self, inputs, training=None): return super(MCDropout, self).call(inputs, training=True) # import pickle # def save_obj(obj, name): # with open(name, 'wb') as f: # pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) # Compute the RMSE given the ground truth (y_true) and the predictions(y_pred) def root_mean_squared_error(y_true, y_pred): return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) #function for computing the density given the temperature(nx1 matrix) def density(temp): return 1000 * (1 - (temp + 288.9414) * (temp - 3.9863)**2 / (508929.2 * (temp + 68.12963))) def phy_loss_mean(params): # useful for cross-checking training udendiff, lam = params def loss(y_true, y_pred): return K.mean(K.relu(udendiff)) return loss #function to calculate the combined loss = sum of rmse and phy based loss def combined_loss(params): udendiff, lam = params def loss(y_true, y_pred): return mean_squared_error(y_true, y_pred) + lam * K.mean(K.relu(udendiff)) return loss def PGNN_train_test(optimizer_name, optimizer_val, use_YPhy, pre_train, tr_size, lamda, iteration, n_nodes, n_layers, drop_frac, reg, lake_name): # fix_seeds(ss) # Hyper-parameters of the training process # batch_size = tr_size batch_size = 1000 num_epochs = 1000 val_frac = 0.2 patience_val = 100 # Initializing results filename exp_name = "DNN_pre_loss_" + pre_train + optimizer_name + '_trsize' + str( tr_size) + '_lamda' + str(lamda) + '_iter' + str(iteration) exp_name = exp_name.replace('.', 'pt') results_dir = '../../../../results/Lake/' model_name = results_dir + exp_name + '_model.h5' # storing the trained model # Load features (Xc) and target values (Y) data_dir = '../../../../data/' filename = lake_name + '.mat' mat = spio.loadmat(data_dir + filename, squeeze_me=True, variable_names=['Y', 'Xc_doy', 'Modeled_temp']) Xc = mat['Xc_doy'] Y = mat['Y'] Xc = Xc[:, :-1] # remove Y_phy, physics model outputs # train and test data trainX, testX, trainY, testY = train_test_split( Xc, Y, train_size=tr_size / Xc.shape[0], test_size=tr_size / Xc.shape[0], random_state=42, shuffle=True) ## train and test data #trainX, trainY = Xc[:tr_size,:], Y[:tr_size] #testX, testY = Xc[-50:,:], Y[-50:] # Loading unsupervised data unsup_filename = lake_name + '_sampled.mat' unsup_mat = spio.loadmat(data_dir + unsup_filename, squeeze_me=True, variable_names=['Xc_doy1', 'Xc_doy2']) uX1 = unsup_mat[ 'Xc_doy1'] # Xc at depth i for every pair of consecutive depth values uX2 = unsup_mat[ 'Xc_doy2'] # Xc at depth i + 1 for every pair of consecutive depth values #uX1 = uX1[:50000,:] #uX2 = uX2[:50000,:] uX1 = uX1[range(0, 649723, 51), :] uX2 = uX2[range(0, 649723, 51), :] if use_YPhy == 0: # Removing the last column from uX (corresponding to Y_PHY) uX1 = uX1[:, :-1] uX2 = uX2[:, :-1] dependencies = {'root_mean_squared_error': root_mean_squared_error} # load the pre-trained model using non-calibrated physics-based model predictions (./data/unlabeled.dat) loaded_model = load_model(results_dir + pre_train, custom_objects=dependencies) # Creating the model model = Sequential() for layer in np.arange(n_layers): if layer == 0: model.add( Dense(n_nodes, activation='relu', input_shape=(np.shape(trainX)[1], ))) else: if reg: model.add( Dense(n_nodes, activation='relu', kernel_regularizer=l1_l2(l1=.00, l2=.00))) else: model.add(Dense(n_nodes, activation='relu')) # model.add(Dropout(rate=drop_frac)) model.add(MCDropout(rate=drop_frac)) model.add(Dense(1, activation='linear')) # pass the weights to all layers but 1st input layer, whose dimensions are updated for new_layer, layer in zip(model.layers[1:], loaded_model.layers[1:]): new_layer.set_weights(layer.get_weights()) # physics-based regularization uin1 = K.constant(value=uX1) # input at depth i uin2 = K.constant(value=uX2) # input at depth i + 1 lam = K.constant(value=lamda) # regularization hyper-parameter uout1 = model(uin1) # model output at depth i uout2 = model(uin2) # model output at depth i + 1 udendiff = ( density(uout1) - density(uout2) ) # difference in density estimates at every pair of depth values totloss = combined_loss([udendiff, lam]) phyloss = phy_loss_mean([udendiff, lam]) model.compile(loss=totloss, optimizer=optimizer_val, metrics=[phyloss, root_mean_squared_error]) early_stopping = EarlyStopping(monitor='val_loss', patience=patience_val, verbose=1) # print('Running...' + optimizer_name) history = model.fit(trainX, trainY, batch_size=batch_size, epochs=num_epochs, verbose=0, validation_split=val_frac, callbacks=[early_stopping, TerminateOnNaN()]) test_score = model.evaluate(testX, testY, verbose=1) print(test_score) # scale the uniform numbers to original space # max and min value in each column max_in_column_Xc = np.max(trainX, axis=0) min_in_column_Xc = np.min(trainX, axis=0) # Xc_scaled = (Xc-min_in_column_Xc)/(max_in_column_Xc-min_in_column_Xc) Xc_org = Xx * (max_in_column_Xc - min_in_column_Xc) + min_in_column_Xc samples = [] for i in range(int(nsim)): #print("simulation num:",i) predictions = model.predict(Xc_org) samples.append(predictions) return np.array(samples) # Main Function if __name__ == '__main__': fix_seeds(1) # List of optimizers to choose from optimizer_names = [ 'Adagrad', 'Adadelta', 'Adam', 'Nadam', 'RMSprop', 'SGD', 'NSGD' ] optimizer_vals = [ Adagrad(clipnorm=1), Adadelta(clipnorm=1), Adam(clipnorm=1), Nadam(clipnorm=1), RMSprop(clipnorm=1), SGD(clipnorm=1.), SGD(clipnorm=1, nesterov=True) ] # selecting the optimizer optimizer_num = 2 optimizer_name = optimizer_names[optimizer_num] optimizer_val = optimizer_vals[optimizer_num] # Selecting Other Hyper-parameters drop_frac = 0.1 # Fraction of nodes to be dropped out use_YPhy = 0 # Whether YPhy is used as another feature in the NN model or not n_layers = 2 # Number of hidden layers n_nodes = 15 # Number of nodes per hidden layer # pre-trained model pre_train = 'Scaled_Lake_Pre-trainAdam_drop0pt1_nL2_nN15_trsize600000_iter0.h5' #set lamda lamda = 10 # Physics-based regularization constant tr_size = int(tr_size) # use regularizer reg = True # total number of runs iter_range = np.arange(1) #List of lakes to choose from lake = ['mendota', 'mille_lacs'] lake_num = 0 # 0 : mendota , 1 : mille_lacs lake_name = lake[lake_num] # total number of runs iter_range = np.arange(1) testrmse = [] # iterating through all possible params for iteration in iter_range: # results, result_file, pred, obs, rmse, obs_train = PGNN_train_test(optimizer_name, optimizer_val, drop_frac, use_YPhy, # iteration, n_layers, n_nodes, tr_size, lamda, reg, samp) # testrmse.append(rmse) pred = PGNN_train_test(optimizer_name, optimizer_val, use_YPhy, pre_train, tr_size, lamda, iteration, n_nodes, n_layers, drop_frac, reg, lake_name) return np.squeeze(pred)
def build_model(input_shape, num_classes, weights='imagenet', opt=None): # create the base pre-trained model base_model = ResNet50(weights=weights, include_top=False, input_shape=input_shape, backend=keras.backend, layers=keras.layers, models=keras.models, utils=keras.utils) # add a global spatial average pooling layer x = base_model.output x = GlobalAveragePooling2D()(x) x = Dense(1024, activation='relu', name='fc2014_1')(x) x = Dropout(0.5)(x) x = Dense(1024, activation='relu', name='fc2014_2')(x) x = Dropout(0.5)(x) x = Dense(num_classes, activation='sigmoid', name='fc28')(x) # this is the model we will train model = Model(inputs=base_model.input, outputs=x, name='pre_trained_resnet50') optimizer = None if opt == 0: optimizer = SGD(lr=0.01, momentum=0.9, nesterov=True, decay=1e-06) elif opt == 1: optimizer = RMSprop(decay=1e-06) elif opt == 2: optimizer = Adagrad(decay=1e-06) elif opt == 3: optimizer = Adadelta(decay=1e-06) elif opt == 4: optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=1e-06, amsgrad=False) elif opt == 5: optimizer = Adamax(decay=1e-06) elif opt == 6: optimizer = Adam(amsgrad=True, decay=1e-06) elif opt == 7: optimizer = Adam(lr=0.0001, amsgrad=True, decay=1e-06) # compile the model (should be done *after* setting layers to non-trainable) model.compile(optimizer=optimizer, loss=binary_crossentropy, metrics=['accuracy']) return model
def DRL_Reasoner(params): # hyper params max_story_len = params['max_story_len'] max_sent_len = params['max_sent_len'] max_q_num = params['max_story_len'] vocab_size = params['vocab_size'] + 1 dim_emb_story = params['dim_emb_story'] dim_emb_env = params['dim_emb_env'] dim_tracker = params['dim_tracker'] entity_num_act = params['ent_range'] relation_num_act = params['relation_num'] dim_q_h_ff = params['dim_q_h_ff'] dim_comp_ff = params['dim_comp_ff'] vocab_size_ans = params['vocab_size_ans'] vocab_size_entity = params['ent_range'] vocab_size_relation = params['relation_num'] tho = params['tho'] # Input Tensors story_input = Input(shape=(max_story_len, max_sent_len)) story_mask = Input(shape=(max_story_len, max_sent_len, dim_emb_story)) q_input = Input(shape=(max_q_num, max_sent_len)) q_mask = Input(shape=(max_q_num, max_sent_len, dim_emb_story)) mask_sim = Input(shape=(max_story_len, max_sent_len * dim_emb_story)) reward_value = Input(shape=(max_story_len, entity_num_act * 1 * relation_num_act)) reward_value_retr = Input(shape=(max_q_num, vocab_size_entity * 1 * vocab_size_relation)) embed_word = Embedding(vocab_size, dim_emb_story, input_length=max_sent_len) embed_seq = TimeDistributed(embed_word, input_shape=(max_story_len, max_sent_len), name='embed_seq') tb_emb = embed_seq(story_input) tb_m_emb = merge([tb_emb, story_mask], mode='mul') encode_single_story = Reshape((max_sent_len * dim_emb_story, ), input_shape=(max_sent_len, dim_emb_story)) encode_story = TimeDistributed(encode_single_story, input_shape=(max_story_len, max_sent_len, dim_emb_story), name='encode_story') sent_embs = encode_story(tb_m_emb) tb_emb_q = embed_seq(q_input) tb_m_emb_q = merge([tb_emb_q, q_mask], mode='mul') q_embs_raw = encode_story(tb_m_emb_q) q_embs = TimeDistributed(Dense(dim_tracker, activation='sigmoid'), input_shape=(max_q_num, max_sent_len * dim_emb_story))(q_embs_raw) # merge and mask sent_env_embs = sent_embs embs_masked = merge([sent_env_embs, mask_sim], mode='mul') # state tracker hiddens = TimeDistributed(Dense(dim_tracker, activation='sigmoid'), input_shape=(max_story_len, max_sent_len * dim_emb_story))(embs_masked) final_rnn_state = Reshape( (dim_tracker, ))(AveragePooling1D(max_story_len)(hiddens)) # policy distribution arg1_bind_raw = Tho(params['tho'])(TimeDistributed( Dense(entity_num_act), input_shape=(max_story_len, dim_tracker), name='arg1_bind_raw')(hiddens)) arg1_bind_soft = Reshape((max_story_len, entity_num_act, 1))(Activation('softmax')(arg1_bind_raw)) arg2_bind_raw = Tho(params['tho'])(TimeDistributed( Dense(1), input_shape=(max_story_len, dim_tracker), name='arg2_bind_raw')(hiddens)) arg2_bind_soft = Reshape( (max_story_len, 1, 1))(Activation('softmax')(arg2_bind_raw)) arg12_bind_list = [ Reshape( (1, entity_num_act * 1, 1))(merge([Slice(i)(arg1_bind_soft), Slice(i)(arg2_bind_soft)], mode='dot', dot_axes=(2, 1))) for i in range(max_story_len) ] arg12_bind = merge(arg12_bind_list, mode='concat', concat_axis=1) relate_bind_raw = Tho(params['tho'])(TimeDistributed( Dense(relation_num_act), input_shape=(max_story_len, dim_tracker), name='relate_bind_raw')(hiddens)) relate_bind_soft = Reshape( (max_story_len, 1, relation_num_act))(Activation('softmax')( relate_bind_raw)) # (, story_len, 1, relation_num) bind_probs_list = [ Reshape((1, entity_num_act * 1 * relation_num_act))(merge( [Slice(i)(arg12_bind), Slice(i)(relate_bind_soft)], mode='dot', dot_axes=(2, 1))) for i in range(max_story_len) ] bind_probs = merge(bind_probs_list, mode='concat', concat_axis=1) bind_probs_log = Lambda(lambda x: (-1) * (K.log(x) + K.epsilon()))( bind_probs) bind_probs_re = merge([bind_probs_log, reward_value], mode='mul', name='action_probs_re') # retrieve probs and answer generation states = RepeatVector(max_q_num)(final_rnn_state) q_state = merge([q_embs, states], mode='concat', concat_axis=2) retrieve_state = TimeDistributed(Dense(dim_q_h_ff, activation='sigmoid'), input_shape=(max_q_num, q_embs.shape[2]))(q_embs) arg1 = Activation('softmax')(Tho(params['tho'])(TimeDistributed( Dense(vocab_size_entity))(retrieve_state))) arg2 = Activation('softmax')(Tho(params['tho'])(TimeDistributed( Dense(1))(retrieve_state))) relation = Activation('softmax')(Tho(params['tho'])(TimeDistributed( Dense(vocab_size_relation))(retrieve_state))) # form the retrieve prob vector arg1_T = Reshape((max_q_num, vocab_size_entity, 1))(arg1) arg2_T = Reshape((max_q_num, 1, 1))(arg2) arg12_list = [ Reshape( (1, vocab_size_entity * 1, 1))(merge([Slice(i)(arg1_T), Slice(i)(arg2_T)], mode='dot', dot_axes=(2, 1))) for i in range(max_story_len) ] arg12 = merge(arg12_list, mode='concat', concat_axis=1) relat_T = Reshape((max_q_num, 1, vocab_size_relation))(relation) retrieve_probs_list = [ Reshape((1, vocab_size_entity * 1 * vocab_size_relation))( merge([Slice(i)(arg12), Slice(i)(relat_T)], mode='dot', dot_axes=(2, 1))) for i in range(max_story_len) ] retrieve_probs = merge(retrieve_probs_list, mode='concat', concat_axis=1) retrieve_probs_log = Lambda(lambda x: (-1) * (K.log(x) + K.epsilon()))( retrieve_probs) retrieve_probs_re = merge([retrieve_probs_log, reward_value_retr], mode='mul', name='retrieve_probs_re') # a complete model DRL_complete = Model(input=[ story_input, story_mask, q_input, q_mask, mask_sim, reward_value, reward_value_retr ], output=[bind_probs_re, retrieve_probs_re]) rmsp = RMSprop(clipnorm=2., lr=0.0001) sgd = SGD(clipnorm=100.) adad = Adadelta(clipnorm=10.) DRL_complete.compile( optimizer=rmsp, loss={ 'action_probs_re': dot_loss, 'retrieve_probs_re': dot_loss }, ) DRL_sim = Model(input=[ story_input, story_mask, q_input, q_mask, mask_sim, reward_value, reward_value_retr ], output=[bind_probs, retrieve_probs]) DRL_debug = Model(input=[ story_input, story_mask, q_input, q_mask, mask_sim, reward_value, reward_value_retr ], output=[ sent_embs, q_embs, hiddens, arg1_bind_raw, arg1_bind_soft, arg2_bind_raw, arg2_bind_soft, relate_bind_raw, relate_bind_soft, bind_probs, bind_probs_log, bind_probs_re, states, arg1, arg2, relation, retrieve_probs, retrieve_probs_log, retrieve_probs_re, ]) # a function to check gradients wrt. arbitrary weights. layer_name_interested = ['embed_seq'] weights = [] for s in layer_name_interested: weights.extend(DRL_complete.get_layer(s).trainable_weights) print weights gradients = DRL_complete.optimizer.get_gradients(DRL_complete.total_loss, weights) input_tensors = [] input_tensors.extend(DRL_complete.inputs) input_tensors.extend(DRL_complete.sample_weights) input_tensors.extend(DRL_complete.targets) input_tensors.append(K.learning_phase()) get_grad = K.function(inputs=input_tensors, outputs=gradients) return DRL_complete, DRL_sim, DRL_debug, get_grad
def creat_model(self): k = self.k input_data = Input( shape=[self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1], name='Input') conv1 = Conv2D(filters=k * 2, kernel_size=[3, 3], padding='same', use_bias=True, kernel_initializer='he_normal')(input_data) conv1 = BatchNormalization()(conv1) conv1 = Activation(activation='relu')(conv1) x = MaxPooling2D(pool_size=[2, 1], strides=[2, 1])(conv1) b1_1 = self.dense_block(x, k) b1_1_conc = concatenate([x, b1_1], axis=-1) b1_2 = self.dense_block(b1_1_conc, k) b1_2_conc = concatenate([x, b1_1, b1_2], axis=-1) b1_3 = self.dense_block(b1_2_conc, k) b1_3_conc = concatenate([x, b1_1, b1_2, b1_3], axis=-1) b1_4 = self.dense_block(b1_3_conc, k) b1_4_conc = concatenate([x, b1_1, b1_2, b1_3, b1_4], axis=-1) b1_5 = self.dense_block(b1_4_conc, k) b1_5_conc = concatenate([x, b1_1, b1_2, b1_3, b1_4, b1_5], axis=-1) transion_1 = self.transition_layer(b1_5_conc, k) b2_1 = self.dense_block(transion_1, k) b2_1_conc = concatenate([transion_1, b2_1], axis=-1) b2_2 = self.dense_block(b2_1_conc, k) b2_2_conc = concatenate([transion_1, b2_1, b2_2], axis=-1) b2_3 = self.dense_block(b2_2_conc, k) b2_3_conc = concatenate([transion_1, b2_1, b2_2, b2_3], axis=-1) b2_4 = self.dense_block(b2_3_conc, k) b2_4_conc = concatenate([transion_1, b2_1, b2_2, b2_3, b2_4], axis=-1) b2_5 = self.dense_block(b2_4_conc, k) b2_5_conc = concatenate([transion_1, b2_1, b2_2, b2_3, b2_4, b2_5], axis=-1) transion_2 = self.transition_layer(b2_5_conc, k) b3_1 = self.dense_block(transion_2, k) b3_1_conc = concatenate([transion_2, b3_1], axis=-1) b3_2 = self.dense_block(b3_1_conc, k) b3_2_conc = concatenate([transion_2, b3_1, b3_2], axis=-1) b3_3 = self.dense_block(b3_2_conc, k) b3_3_conc = concatenate([transion_2, b3_1, b3_2, b3_3], axis=-1) b3_4 = self.dense_block(b3_3_conc, k) b3_4_conc = concatenate([transion_2, b3_1, b3_2, b3_3, b3_4], axis=-1) b3_5 = self.dense_block(b3_4_conc, k) b3_5_conc = concatenate([transion_2, b3_1, b3_2, b3_3, b3_4, b3_5], axis=-1) transion_3 = self.transition_layer(b3_5_conc, k) reshape_layer = Reshape([100, 120])(transion_3) # dense1 = Dense(units=256 , use_bias=True , kernel_initializer='he_normal')(reshape_layer) # dense1 = BatchNormalization()(dense1) # dense1 = Activation(activation='relu')(dense1) # dense1 = Dropout(rate=0.1)(dense1) dense2 = Dense(units=1024, use_bias=True, kernel_initializer='he_normal')(reshape_layer) dense2 = BatchNormalization()(dense2) dense2 = Activation(activation='relu')(dense2) dense2 = Dropout(rate=0.2)(dense2) dense3 = Dense(units=self.MS_OUTPUT_SIZE, use_bias=True)(dense2) y_pred = Activation(activation='softmax')(dense3) model_data = Model(inputs=input_data, outputs=y_pred) model_data.summary() plot_model(model_data, '/home/zhangwei/01.png', show_shapes=True) labels = Input(shape=[self.label_max_string_length], name='labels', dtype='float32') input_length = Input(shape=[1], name='input_length', dtype='int64') label_length = Input(shape=[1], name='label_length', dtype='int64') loss_out = Lambda(self.ctc_lambda_func, output_shape=[ 1, ], name='ctc')([y_pred, labels, input_length, label_length]) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) sgd = SGD(lr=0.00001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) ada_d = Adadelta(lr=0.0005, rho=0.95, epsilon=1e-6) adam = Adam(lr=0.001, epsilon=1e-6, decay=10e-3) model.compile(optimizer=adam, loss={ 'ctc': lambda y_true, y_pred: y_pred }) print( '==========================模型创建成功=================================' ) return model, model_data
model = Model(inputs=input_tensor, outputs=output) ''' if 'urp_captcha_model.h5' in os.listdir("output"): print("load模型") model = load_model("output/urp_captcha_model.h5",custom_objects={'my_metrics': my_metrics}) else: print("build模型") model = Model(inputs=input_tensor, outputs=output) opt = Adadelta(lr=0.1) model.compile(loss = 'categorical_crossentropy', optimizer=opt, metrics=['accuracy',my_metrics]) #评价函数和 损失函数 相似,只不过评价函数的结果不会用于训练过程中。 from keras.utils.vis_utils import plot_model MODEL_VIS_FILE = 'output/captcha_classfication.png' # 模型可视化 plot_model(model,to_file=MODEL_VIS_FILE,show_shapes=True) for i in range(10000): ''' if i%10 == 0: #爬取5000张图片 get_dataset.get_jpg(5000) #生成'captcha_train_data.pkl'
# Por fim compilamos o modelo especificando um otimizador, a função de custo, e opcionalmente # métricas para serem observadas durante treinamento. #lambd = 0.002 add = 0 for optm in ["Nadam", "Adamax", "Adadelta"]: for lambd in [0.001, 0.0015, 0.0013, 0.0008]: #, 0.05, 0.1, 0.5, 1.0, 3.0]: optmizer = None if (optm == "Nadam"): optmizer = Nadam(lr=lambd) elif (optm == "Adamax"): optmizer = Adamax(lr=lambd) elif (optm == "Adadelta" and add == 0): optmizer = Adadelta() lambd = 1.0 add = 1 else: continue print("Executing " + optm + " Optimizer For Learning Rate: " + str(lambd)) classifier = create_baseline_model(lambd, optmizer) # Para treinar a rede passamos o conjunto de treinamento e especificamos o tamanho do mini-batch, # o número máximo de épocas, e opcionalmente callbacks. No seguinte exemplo utilizamos early # stopping para interromper o treinamento caso a performance não melhore em um conjunto de validação. #callbacks=[EarlyStopping(patience=3)] history = classifier.fit(X_train, y_train, batch_size=64,
def compile(self): optimizer = Adadelta(lr=0.01, clipnorm=3.0, decay=1e-5) self._model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['acc'])
if __name__ == '__main__': batch_size = 32 nb_epochs = 10 train_dir = dir + '/Dataset/faces/Train/pickleStorage' test_dir = dir + '/Dataset/faces/Test/pickleStorage' model_dir = dir + '/Model/generalModel.json' train_gen = DeepFakeSequence.TrainSequence(train_dir, batch_size) test_gen = DeepFakeSequence.TestSequence(test_dir, batch_size) for i in range(10): classifier = FakeDetector.ConvNet() classifier.summary() learning_rate = 0.1 opt = Adadelta(lr=learning_rate) early_stopper = EarlyStopping(monitor='acc', min_delta=0.01, patience=20) classifier.load_weights(dir + '/AutoEncModel/autoEnc_weights.h5', by_name=True) classifier.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy']) start = time.clock() print(i, '\n') classifier.fit_generator(generator=train_gen, epochs=200, verbose=2, validation_data=test_gen, shuffle=True,
def fit_model(data,depth,growth_rate,nb_dense_block,nb_filter,dropout,lr,epochs,opt,reduction,bn,batch_size,fc_dropout,fc_filter,fc_layers): input_shape = (1,96,96) es_patience = 4 lr_patience = 3 #batch_size = 64 weight_file = 'keras_densenet_siamese_8Nov_2300_weights.h5' file_name = 'keras_densenet_siamese_8Nov_2300' dense_dropout = 0.5 print("Epochs ",epochs," batch_size: ",batch_size," lr: ",lr," optimizer: ",opt) print(" es_patience: ",es_patience," lr_patience: ",lr_patience) print(" batch_size: ",batch_size," fc_dropout: ",fc_dropout," fc_filter: ",fc_filter," fc_layers: ",fc_layers) base_network = create_base_network(depth,growth_rate,nb_dense_block,nb_filter,dropout,reduction,bn) input_a = Input(shape=input_shape) input_b = Input(shape=input_shape) processed_a = base_network(input_a) processed_b = base_network(input_b) combined_features = concatenate([processed_a, processed_b], name = 'merge_features') combined_features = Dense(fc_filter, kernel_initializer=keras.initializers.he_normal())(combined_features) combined_features = Activation('relu')(combined_features) combined_features = BatchNormalization()(combined_features) combined_features = Dropout(fc_dropout)(combined_features) combined_features = Dense(1, activation = 'sigmoid')(combined_features) model = Model(inputs = [input_a, input_b], outputs = [combined_features], name = 'model') model.summary() if opt=='adam': optimizer = Adam(lr=lr) # Using Adam instead of SGD to speed up training elif opt=='nadam': optimizer=Nadam(lr=lr) elif opt=='adadelta': optimizer=Adadelta(lr=lr) elif opt=='adamax': optimizer=Adamax(lr=lr) elif opt=='rmsprop': optimizer=RMSprop(lr=lr) else: optimizer=SGD(lr=lr,momentum=0.9) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['acc']) print('Finished compiling') #model.compile(loss=binary_crossentropy, optimizer=opt, metrics=['accuracy']) es = EarlyStopping(monitor='val_acc', patience=es_patience,verbose=1) checkpointer = ModelCheckpoint(filepath=weight_file, verbose=2, save_best_only=True) lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1), cooldown=0, patience=lr_patience, min_lr=0.5e-6,verbose=1) model.fit([data[0], data[1]],data[2], batch_size=batch_size, epochs=epochs, validation_data=([data[3], data[4]],data[5]), callbacks=[es,lr_reducer], verbose=2) #model = load_model(weight_file) #This is the best model score, acc = model.evaluate([data[6], data[7]],data[8], verbose=0) print("Test accuracy:%0.3f"% acc) pred = model.predict([data[6], data[7]]) auc_score = roc_auc_score(data[8],pred) auc_score = np.round(auc_score,4) print("current auc_score ------------------> %0.3f"%auc_score) if(auc_score > .94): model_json = model.to_json() score = str(auc_score) with open(file_name+score+".json", "w") as json_file: json_file.write(model_json) model.save_weights(file_name+score+".h5") print("Saved model to disk") del model K.clear_session() return acc,auc_score
def train_eval(esargs): """ train and eval the model """ global trainloader global testloader global net global best_acc global rank best_acc = 0 lr_explore = esargs['learning_rate'] bs_explore = int(esargs['batch_size']) if args.optimizer == "SGD": optimizer = SGD(lr=lr_explore, momentum=0, decay=args.weight_decay) elif args.optimizer == "Adadelta": optimizer = Adadelta(lr=lr_explore, decay=args.weight_decay) elif args.optimizer == "Adagrad": optimizer = Adagrad(lr=lr_explore, decay=args.weight_decay) elif args.optimizer == "Adam": optimizer = Adam(lr=lr_explore, decay=args.weight_decay) elif args.optimizer == "Adamax": optimizer = Adamax(lr=lr_explore, decay=args.weight_decay) elif args.optimizer == "RMSprop": optimizer = RMSprop(lr=lr_explore, decay=args.weight_decay) else: logger.debug("Input A Wrong optimizer") # Compile the model net.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"]) (x_train, y_train) = trainloader (x_test, y_test) = testloader # train procedure #使用callback函数记录epoch信息 trial_id = nni.get_trial_id() f11 = open("/root/keras_trace" + str(rank), "a+") f11.write("rank-" + str(rank) + str(trial_id) + "\n") f11.close() available_devices = os.environ["CUDA_VISIBLE_DEVICES"] gpus = len(available_devices.split(",")) #需要打印看看GPU个数 history = net.fit( x=x_train, y=y_train, batch_size=bs_explore * gpus, validation_data=(x_test, y_test), epochs=args.epochs, shuffle=True, callbacks=[ SendMetrics(), Epoch_num_record(experiment_path, trial_id), EarlyStopping(min_delta=0.001, patience=10), TensorBoard(log_dir=TENSORBOARD_DIR), ], ) # trial report final acc to tuner if rank == 0: _, acc = net.evaluate(x_test, y_test) #记录超参搜索期间产生的最优acc f11 = open("/root/log", "a+") f11.write("######acc:" + str(acc) + "\n") f11.close() if acc > best_acc: best_acc = acc logger.debug("Final result is: %.3f", acc) list = [best_acc, bs_explore, str(lr_explore)[0:7]] reslist.append(list) acclist.append(best_acc) return best_acc, history.epoch[-1]
model = Sequential() model.add( Dense(164, kernel_initializer='lecun_uniform', input_shape=((dataCount * dataDepth), ))) model.add(Activation('relu')) # Hidden layer model.add(Dense(150, kernel_initializer='lecun_uniform')) model.add(Activation('relu')) # Output layer, use linear so they're real world values model.add(Dense(6, kernel_initializer='lecun_uniform')) model.add(Activation('linear')) rms = RMSprop() opt = Adadelta() # Next try model.compile(loss='binary_crossentropy', optimizer=opt) # Functions #Calculated the time difference in milliseconds #Regex on each filepath to get the time variables #Create an object to get the epoch time #Differene in epoch time is the dt (in seconds) def calculateDt(prev, current): #Calculate the epoch of the prev tick #File last value is millisconds so convert to microseconds in datetime constructor p = re.search(r'.+center_(\d+)_(\d+)_(\d+)_(\d+)_(\d+)_(\d+)_(\d+).jpg', prev)
def train_model_batch(model, config, test, resume=None): """ Trains the model using Keras train batch method :param resume: :param model: :param config: :param test: :return: """ if config['optimizer']['method'] == 'adagrad': optimizer = Adagrad() elif config['optimizer']['method'] == 'adadelta': optimizer = Adadelta() elif config['optimizer']['method'] == 'adam': optimizer = Adam() else: # default SGD params = config['optimizer']['params'] if resume is None: # New experiment optimizer = SGD(lr=params['lrate'], momentum=params['momentum'], decay=params['decay'], nesterov=params['nesterov']) iepoch = 0 else: # Resume training nlrate = params['lrate'] - ( (params['lrate'] / config['train']['epochs']) * params['epochs_trained']) optimizer = SGD(lr=nlrate, momentum=params['momentum'], decay=params['decay'], nesterov=params['nesterov']) iepoch = config['train']['epochs_trained'] model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) classweight = detransweights(config['train']['classweight']) if 'log' not in config or config['log'] == 'db': dblog = DBLog(database=mongoconnection, config=config, model=model, modelj=model.to_json(), resume=resume) else: dblog = FileLog(config=config, modelj=model.to_json()) recode = None if 'recode' not in config else recoding_dictionary( config['recode']) train = Dataset(config['datapath'], config['traindata'], config['zfactor'], imgord=config['imgord'], nclasses=test.nclasses, recode=recode) # Train Epochs logs = {'loss': 0.0, 'acc': 0.0, 'val_loss': 0.0, 'val_acc': 0.0} train.open() chunks, _ = train.chunks_list() for epoch in range(iepoch, config['train']['epochs']): shuffle(chunks) # Train Batches lloss = [] lacc = [] for chunk in chunks: train.load_chunk(chunk, config['train']['batchsize']) for p in train.perm: loss, acc = model.train_on_batch(train.X_train[p], train.y_train[p], class_weight=classweight) lloss.append(loss) lacc.append(acc) logs['loss'] = float(np.mean(lloss)) logs['acc'] = float(np.mean(lacc)) logs['val_loss'], logs['val_acc'] = model.evaluate(test.X_train, test.y_train, verbose=0) force_stop = dblog.force_stop() dblog.on_epoch_end(epoch, logs=logs) if config['savepath']: model.save(config['savepath'] + '/' + str(dblog.id) + '.h5') # If the training is stopped remotely training stops if force_stop: break train.close() scores = model.evaluate(test.X_train, test.y_train, verbose=0) dblog.on_train_end(logs={'acc': logs['acc'], 'val_acc': scores[1]}) y_pred = model.predict_classes(test.X_train, verbose=0) dblog.save_final_results(scores, confusion_matrix(test.y_labels, y_pred), classification_report(test.y_labels, y_pred))
def CreateModel(self): ''' 定义CNN/LSTM/CTC模型,使用函数式模型 输入层:39维的特征值序列,一条语音数据的最大长度设为1500(大约15s) 隐藏层一:1024个神经元的卷积层 隐藏层二:池化层,池化窗口大小为2 隐藏层三:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 隐藏层四:循环层、LSTM层 隐藏层五:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 隐藏层六:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, 输出层:自定义层,即CTC层,使用CTC的loss作为损失函数,实现连接性时序多输出 ''' # 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500 input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) layer_h1 = Conv2D(32, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 layer_h1 = Dropout(0.5)(layer_h1) layer_h2 = Conv2D(32, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h1) # 卷积层 layer_h3 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h2) # 池化层 #layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合 layer_h4 = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h3) # 卷积层 layer_h4 = Dropout(0.5)(layer_h4) layer_h5 = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h4) # 卷积层 layer_h6 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h5) # 池化层 #test=Model(inputs = input_data, outputs = layer_h6) #test.summary() layer_h7 = Reshape((400, 3200))(layer_h6) #Reshape层 #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 layer_h7 = Dropout(0.5)(layer_h7) # 随机中断部分神经网络连接,防止过拟合 layer_h8 = Dense(256, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h7) # 全连接层 layer_h8 = Dropout(0.5)(layer_h8) # 随机中断部分神经网络连接,防止过拟合 layer_h9 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, kernel_initializer='he_normal')(layer_h8) # 全连接层 y_pred = Activation('softmax', name='Activation0')(layer_h9) model_data = Model(inputs=input_data, outputs=y_pred) #model_data.summary() #labels = Input(name='the_labels', shape=[60], dtype='float32') labels = Input(name='the_labels', shape=[self.label_max_string_length], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ), name='ctc')( [y_pred, labels, input_length, label_length]) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) model.summary() # clipnorm seems to speeds up convergence #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) ada_d = Adadelta(lr=0.01, rho=0.95, epsilon=1e-06) #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=ada_d) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) print('[*提示] 创建模型成功,模型编译成功') return model, model_data
def pass_arg(Xx, nsim, tr_size): print("Tr_size:", tr_size) def fix_seeds(seed): random.seed(seed) np.random.seed(seed) tf.random.set_seed(seed) session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf) # K.set_session(sess) tf.compat.v1.keras.backend.set_session(sess) ss = 1 fix_seeds(ss) # MC dropout class MCDropout(Dropout): def call(self, inputs, training=None): return super(MCDropout, self).call(inputs, training=True) # Compute the RMSE given the ground truth (y_true) and the predictions(y_pred) def root_mean_squared_error(y_true, y_pred): return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) # # Making sure dimensionless bond length is less than 1 # def bond(bl): # return bl-1.0 # Making sure dimensionless bond length is less than 1 def bond(bl): bln = -bl * (bl < 0) blp = bl * (bl >= 1.0) - 1 * (bl >= 1.0) return bln + blp # # Making sure final porosity is less than initial # def poros(poroi, porof): # # porof[porof < 0] = 1-porof[porof < 0] # porof[porof < 0] = poroi[0]-porof[porof < 0] # print(porof) # return porof-poroi # Making sure final porosity is less than initial def poros(poroi, porof): porofn = -porof * (porof < 0) porofp = porof * (porof >= poroi) - poroi * (porof >= poroi) return porofp + porofn # def strength(bl, porof, nlayer=6): # discp = [] # sigma01, sigma02 = 6, 31 # C1s = 21 # sigma_long = sigma01*(np.exp((1.0-porof)**(C1s*nlayer))-porof) + sigma02*(1.0-porof) # # print("sigma_long:",sigma_long) # for i in range(len(sigma_long)): # for j in range(i + 1, len(sigma_long)): # if (sigma_long[j] > sigma_long[i]): # discp.append(bl[i] - bl[j]) # discp = np.array(discp) # print(discp) # return discp def strength1(bl, porof, nlayer=6): sigma01, sigma02 = 6, 31 C1s = 21 sigma_long = sigma01 * (np.exp( (1.0 - porof)**(C1s * nlayer)) - porof) + sigma02 * (1.0 - porof) sigma_long_sorted = np.sort(sigma_long, axis=-1) # sorts along first axis (down) ind = np.argsort(sigma_long, axis=-1) # sorts along first axis (down) bl_sorted = np.take_along_axis(bl, ind, axis=-1) # same as np.sort(x, axis=0) corr_bl_sorted = np.sort(bl, axis=-1) # sorts along first axis (down) return corr_bl_sorted - bl_sorted def strength2(bl, porof, nlayer=6): sigma01, sigma02 = 6, 31 C1s = 21 sigma_long = sigma01 * (np.exp( (1.0 - porof)**(C1s * nlayer)) - porof) + sigma02 * (1.0 - porof) sigma_long_sorted = np.sort(sigma_long, axis=-1) # sorts along first axis (down) ind = np.argsort(sigma_long, axis=-1) # sorts along first axis (down) bl_sorted = np.take_along_axis(bl, ind, axis=-1) # same as np.sort(x, axis=0) return sum(bl_sorted[1:] - bl_sorted[:-1] < 0) / 14 def phy_loss_mean(params): # useful for cross-checking training loss1, loss2, loss3, loss4, lam1, lam2 = params x1, x2, x3 = loss1 * (loss1 > 0), loss2 * (loss2 > 0), loss3 * (loss3 > 0) # print(np.mean(x1), x1.shape[0]) # print(np.mean(x2), x2.shape[0]) # print(np.mean(x3), x3.shape[0]) if x1.any() and x1.shape[0] > 1: X_scaled1 = (x1 - np.min(x1)) / (np.max(x1) - np.min(x1)) x1 = X_scaled1 if x2.any() and x2.shape[0] > 1: X_scaled2 = (x2 - np.min(x2)) / (np.max(x2) - np.min(x2)) x2 = X_scaled2 if x3.any() and x3.shape[0] > 1: X_scaled3 = (x3 - np.min(x3)) / (np.max(x3) - np.min(x3)) x3 = X_scaled3 return (lam1 * np.mean(x1) + lam2 * np.mean(x2) + lam2 * np.mean(x3)) # return (lam1*np.mean(x1) + lam2*np.mean(x2) + lam2*np.mean(x3) + lam2*loss4) # def phy_loss_mean(params): # # useful for cross-checking training # diff1, diff2, lam1, lam2 = params # x1, x2 = diff1*(diff1>0), diff2*(diff2>0) # if np.any(x1): # X_scaled1 = (x1 - np.min(x1)) / (np.max(x1) - np.min(x1)) # x1 = X_scaled1 # if np.any(x2): # X_scaled2 = (x2 - np.min(x2)) / (np.max(x2) - np.min(x2)) # x2 = X_scaled2 # return lam1*np.mean(x1) + lam2*np.mean(x2) def PGNN_train_test(optimizer_name, optimizer_val, drop_rate, iteration, n_layers, n_nodes, tr_size, lamda, reg): # Hyper-parameters of the training process # batch_size = int(tr_size/2) batch_size = 1 num_epochs = 300 val_frac = 0.25 patience_val = 80 # Initializing results filename exp_name = optimizer_name + '_drop' + str(drop_rate) + '_nL' + str( n_layers) + '_nN' + str(n_nodes) + '_trsize' + str( tr_size) + '_iter' + str(iteration) exp_name = exp_name.replace('.', 'pt') results_dir = '../results/' model_name = results_dir + exp_name + '_NoPhyInfomodel.h5' # storing the trained model if reg: results_name = results_dir + exp_name + '_results_regularizer.dat' # storing the results of the model else: results_name = results_dir + exp_name + '_results.dat' # storing the results of the model # Load labeled data data = np.loadtxt('../data/labeled_data.dat') # data = np.loadtxt('../data/labeled_data_BK_constw_unique.dat') # data = np.loadtxt('../data/labeled_data_BK_constw_v2.dat') # x_labeled = data[:, :-5] # -2 because we do not need porosity predictions x_labeled = data[:, : 2] # -2 because we do not need porosity predictions y_labeled = data[:, -3:-1] # normalize dataset with MinMaxScaler scaler = preprocessing.MinMaxScaler(feature_range=(0, 1.0)) # scaler = preprocessing.StandardScaler() x_labeled = scaler.fit_transform(x_labeled) # y_labeled = scaler.fit_transform(y_labeled) # train and test data trainX, trainY = x_labeled[:tr_size, :], y_labeled[:tr_size] # testX, testY = x_labeled[tr_size:,:], y_labeled[tr_size:] # init_poro = data[tr_size:, -1] testX, testY = x_labeled[tr_size:, :], y_labeled[tr_size:] init_poro = data[tr_size:, -1] # Creating the model model = Sequential() for layer in np.arange(n_layers): if layer == 0: model.add( Dense(n_nodes, activation='relu', input_shape=(np.shape(trainX)[1], ))) else: if reg: model.add( Dense(n_nodes, activation='relu', kernel_regularizer=l1_l2(l1=.001, l2=.001))) else: model.add(Dense(n_nodes, activation='relu')) model.add(MCDropout(rate=drop_rate)) model.add(Dense(2, activation='linear')) model.compile(loss='mean_squared_error', optimizer=optimizer_val, metrics=[root_mean_squared_error]) early_stopping = EarlyStopping(monitor='val_loss', patience=patience_val, verbose=1) print('Running...' + optimizer_name) history = model.fit(trainX, trainY, batch_size=batch_size, epochs=num_epochs, verbose=0, validation_split=val_frac, callbacks=[early_stopping, TerminateOnNaN()]) test_score = model.evaluate(testX, testY, verbose=1) print(test_score) # predictions = model.predict(testX) # # inv_pred = scaler.inverse_transform(predictions) # phyloss1 = bond(predictions[:,0]) # physics loss 1 # # init_poro_ndim = np.ones((init_poro.shape)) # # diff2 = poros(init_poro_ndim, predictions[:,1]) # physics loss 2 # phyloss2 = poros(init_poro, predictions[:,1]) # physics loss 2 # phyloss3 = strength1(predictions[:,0], predictions[:,1]) # phyloss4 = strength2(predictions[:,0], predictions[:,1]) # lam1, lam2 = lamda[0], lamda[1] # phyloss = phy_loss_mean([phyloss1, phyloss2, phyloss3, phyloss4, lam1, lam2]) # print('iter: ' + str(iteration) + # ' nL: ' + str(n_layers) + ' nN: ' + str(n_nodes) + # ' trsize: ' + str(tr_size) + # ' TestRMSE: ' + str(test_score[1]) + ' PhyLoss: ' + str(phyloss), "\n") # # model.save(model_name) # # save results # results = {'train_rmse':history.history['root_mean_squared_error'], # 'val_rmse':history.history['val_root_mean_squared_error'], # 'test_rmse':test_score[1], 'PhyLoss':phyloss} # save_obj(results, results_name) # return results, results_name, predictions, testY, test_score[1] # predictions = model.predict(Xx) samples = [] for i in range(int(nsim)): print("simulation num:", i) predictions = model.predict(Xx) predictions = predictions[:, 1] samples.append(predictions) return np.array(samples) # Main Function if __name__ == '__main__': # fix_seeds(1) # List of optimizers to choose from optimizer_names = [ 'Adagrad', 'Adadelta', 'Adam', 'Nadam', 'RMSprop', 'SGD', 'NSGD' ] optimizer_vals = [ Adagrad(clipnorm=1), Adadelta(clipnorm=1), Adam(clipnorm=1), Nadam(clipnorm=1), RMSprop(clipnorm=1), SGD(clipnorm=1.), SGD(clipnorm=1, nesterov=True) ] # selecting the optimizer optimizer_num = 1 optimizer_name = optimizer_names[optimizer_num] optimizer_val = optimizer_vals[optimizer_num] # Selecting Other Hyper-parameters drop_rate = 0.01 # Fraction of nodes to be dropped out n_layers = 2 # Number of hidden layers n_nodes = 5 # Number of nodes per hidden layer # # Iterating over different training fractions and splitting indices for train-test splits # trsize_range = [4,6,8,10,20] # #default training size = 5000 # tr_size = trsize_range[4] tr_size = int(tr_size) # use regularizer reg = True #set lamda=0 for pgnn0 lamda = [1, 1] # Physics-based regularization constant # total number of runs iter_range = np.arange(1) testrmse = [] # iterating through all possible params for iteration in iter_range: # results, result_file, pred, obs, rmse = PGNN_train_test(optimizer_name, optimizer_val, drop_rate, # iteration, n_layers, n_nodes, tr_size, lamda, reg) # testrmse.append(rmse) pred = PGNN_train_test(optimizer_name, optimizer_val, drop_rate, iteration, n_layers, n_nodes, tr_size, lamda, reg) return np.squeeze(pred)
def train(model): print("Model done") # x_train = [] # y_train = [] # # preprocessing_function : # function that will be implied on each input. The function will run after the image is # resized and augmented. The function should take one argument: one image (Numpy tensor # with rank 3), and should output a Numpy tensor with the same shape. # we create two instances with the same arguments data_gen_args = dict( preprocessing_function=random_crop, # rescale=1. / 255, # featurewise_center=True, # featurewise_std_normalization=True, horizontal_flip=True, vertical_flip=True, validation_split=0.1) x_image_gen = ImageDataGenerator(**data_gen_args) y_image_gen = ImageDataGenerator(**data_gen_args) print("Before Img Gen FIT") # Provide the same seed and keyword arguments to the fit and flow methods seed = 1 # compute quantities required for featurewise normalization (std, center) # x_image_gen.fit(x_train, augment=True, seed=seed) # TODO: x_train NEED to be 4 dimensional # y_image_gen.fit(y_train, augment=True, seed=seed) x_gen = x_image_gen.flow_from_directory( 'pictures/keras_test', target_size=(img_width // scale_fact, img_width // scale_fact), batch_size=1, class_mode=None, # TODO: could be "input" save_to_dir="pictures/keras_test/training/training", # save_prefix="t0_", subset="training", interpolation="lanczos", seed=seed) y_gen = y_image_gen.flow_from_directory( 'pictures/keras_test', target_size=(img_width, img_width), batch_size=1, class_mode=None, # TODO: was None save_to_dir="pictures/keras_test/training/validation", # save_prefix="t0_", subset="training", interpolation="lanczos", seed=seed) print("Before Zip") # combine generators into one which yields x and y together train_generator = itertools.zip_longest(x_gen, y_gen) optimizer = Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0) model.compile(optimizer=optimizer, loss='mean_squared_error') print("Before fit_generator") model.fit_generator( train_generator, verbose=2, steps_per_epoch= 12, # equal to (nbr samples of your dataset) // (batch size) epochs=6, callbacks=get_callbacks()) run_tests(model)
from keras.callbacks import ModelCheckpoint, EarlyStopping from keras.callbacks import TerminateOnNaN from nnf.io_utils import generate_tag, SettingsParser from nnf.custom_keras_utils import spread, mean_pred, LossTracking, rmse_loss, \ custom_sigmoid from nnf.batch_preprocess import PartitionProcessor from nnf.io_utils import store_nn_paras activation_list = [ custom_sigmoid, 'softplus', 'relu', 'tanh', 'sigmoid', 'softplus' ] optimizer_list = [ SGD(lr=0.001, decay=0.001, momentum=0.9, nesterov=True), Adam(), Nadam(), Adadelta(), Adam(clipnorm=1.0), Nadam(clipnorm=1.0), Adadelta(clipnorm=1.0), 'rmsprop', 'adagrad', 'adamax' ] loss_list = [ rmse_loss, 'mean_squared_error', 'mean_absolute_error', 'mean_absolute_percentage_error' ] # arbitrary fixed seed for reproducibility np.random.seed(8) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' def print_progress(line): """
model.add(Flatten()) model.add(Dense(1024)) model.add( keras.layers.advanced_activations.PReLU(alpha_initializer='zero', weights=None)) model.add(Dropout(0.2)) model.add(Dense(1024)) model.add( keras.layers.advanced_activations.PReLU(alpha_initializer='zero', weights=None)) model.add(Dropout(0.2)) model.add(Dense(7, activation='softmax')) ada = Adadelta(lr=0.1, rho=0.95, epsilon=1e-08) model.compile(loss='categorical_crossentropy', optimizer=ada, metrics=['accuracy']) model.summary() y_ = np_utils.to_categorical(y) Y_train = y_[:train_rows] Y_crossval = y_[train_rows:test_rows] print(X_crossval.shape, model.input_shape, Y_crossval.shape) datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset
internal_embedding_size=96, cluster_count_dense_layers=1, cluster_count_dense_units=256, output_dense_layers=1, output_dense_units=256, cluster_count_lstm_layers=1, cluster_count_lstm_units=128, kl_embedding_size=128, kl_divergence_factor=0.1) c_nn.include_self_comparison = False c_nn.weighted_classes = True c_nn.class_weights_approximation = 'stochastic' c_nn.minibatch_size = 15 c_nn.class_weights_post_processing_f = lambda x: np.sqrt(x) c_nn.set_loss_weight('similarities_output', 5.0) c_nn.optimizer = Adadelta(lr=5.0) validation_factor = 10 c_nn.early_stopping_iterations = 10001 c_nn.validate_every_nth_epoch = 10 * validation_factor c_nn.validation_data_count = c_nn.minibatch_size * validation_factor # c_nn.prepend_base_name_to_layer_name = False print_loss_plot_every_nth_itr = 100 # c_nn.f_cluster_count = lambda: 10 # c_nn.minibatch_size = 200 # c_nn._get_keras_loss() # i = 0 # start = time()
m = Dropout(0.5)(m) m = Dense(512, activation='elu')(m) m = Dropout(0.5)(m) o = Dense(out_dim, activation='softmax')(m) model = Model(inputs=i, outputs=o) model.summary() data_augmentation = False # causes MemoryError if not data_augmentation: model.compile(loss='categorical_crossentropy', optimizer=Nadam(lr=1e-3), metrics=['accuracy']) model.fit(x_train, y_train, epochs=4, verbose=1, validation_data=(x_val, y_val)) model.compile(loss='categorical_crossentropy', optimizer=Nadam(lr=1e-4), metrics=['accuracy']) model.fit(x_train, y_train, epochs=4, verbose=1, validation_data=(x_val, y_val)) model.compile(loss='categorical_crossentropy', optimizer=Adadelta(lr=1e-4), metrics=['accuracy']) model.fit(x_train, y_train, epochs=4, verbose=1, validation_data=(x_val, y_val)) else: datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) height_shift_range=0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=False, # randomly flip images vertical_flip=False) # randomly flip images datagen.fit(x_train)
model.add(Convolution2D(12,3,3,init='uniform',border_mode='full',input_shape=(3,s,s))) model.add(Activation('tanh')) model.add(Convolution2D(12, 3, 3)) model.add(Activation('tanh')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.5)) model.add(Convolution2D(24,3,3,border_mode='full')) model.add(Activation('tanh')) model.add(Convolution2D(24, 3, 3)) model.add(Activation('tanh')) model.add(MaxPooling2D(pool_size=(2, 2))) #model.add(Dropout(0.5)) #model.add(Convolution2D(48, 3, 3, border_mode='full')) #model.add(Activation('tanh')) #model.add(Convolution2D(48, 3, 3)) #model.add(Activation('tanh')) #model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dropout(0.5)) model.add(Dense(100)) model.add(Activation('tanh')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) sgd = SGD(lr=0.000001, decay=1e-6, momentum=0.9, nesterov=True) RMS = RMSprop(lr=0.0000005, rho=0.7, epsilon=1e-08) Ada = Adadelta(lr=0.001, rho=0.95, epsilon=1e-06) model.compile(loss='categorical_crossentropy', optimizer = RMS) model.fit(train_data, train_labels, batch_size=8, nb_epoch=200,verbose=1,show_accuracy=True,validation_data=(test_data, test_labels))
def load_network(self, name, xtrain=[], ytrain=[], xtest=[], ytest=[], train=False): summary = self.summary if name == 'odin_v1': filters1 = 8 filters2 = 16 conv2 = True conv4 = False nodes_1 = 32 nodes_2 = 16 leaky = False epochs = 10 elif name == 'odin_v2': filters1 = 8 filters2 = 16 conv2 = True conv4 = False nodes_1 = 32 nodes_2 = 16 leaky = True epochs = 25 elif name == 'horus': filters1 = 16 filters2 = 16 conv2 = False conv4 = False nodes_1 = 32 nodes_2 = 16 leaky = False epochs = 20 elif name == 'providence_v2': filters1 = 8 filters2 = 16 conv2 = True conv4 = True nodes_1 = 256 nodes_2 = 128 leaky = False epochs = 10 else: filters1 = 8 filters2 = 16 conv2 = True conv4 = True nodes_1 = 2048 nodes_2 = 512 leaky = False epochs = 10 if train: # Input shape input_layer = Input(xtrain[0].shape) ## Convolutional layers 1 conv_layer1 = Conv3D(filters=filters1, kernel_size=(3, 3, 3), activation='relu')(input_layer) # Add 2nd convolution is needed if conv2 == True: if xtrain[0].shape[0] > 5: conv_layer2 = Conv3D(filters=filters2, kernel_size=(3, 3, 3), activation='relu')(conv_layer1) else: conv_layer2 = Conv3D(filters=filters2, kernel_size=(1, 3, 3), activation='relu')(conv_layer1) else: conv_layer2 = conv_layer1 # Max pooling to obtain the most imformatic features if xtrain[0].shape[0] > 5: pooling_layer1 = MaxPooling3D(pool_size=(2, 2, 2))(conv_layer2) else: pooling_layer1 = MaxPooling3D(pool_size=(1, 2, 2))(conv_layer2) ## Convolutional layers 2 if xtrain[0].shape[0] > 8: conv_layer3 = Conv3D(filters=32, kernel_size=(3, 3, 3), activation='relu')(pooling_layer1) else: conv_layer3 = Conv3D(filters=32, kernel_size=(1, 3, 3), activation='relu')(pooling_layer1) # Add 4th conv layer if needed if conv4 == True: # When using less frames, we need to reduce kernal size to fit after previous convolutions if xtrain[0].shape[0] > 11: conv_layer4 = Conv3D(filters=64, kernel_size=(3, 3, 3), activation='relu')(conv_layer3) else: conv_layer4 = Conv3D(filters=64, kernel_size=(1, 3, 3), activation='relu')(conv_layer3) else: conv_layer4 = conv_layer3 # Max pooling to obtain the most imformatic features # When using less frames, we need to reduce kernal size to fit after previous convolutions if xtrain[0].shape[0] > 14: pooling_layer2 = MaxPooling3D(pool_size=(2, 2, 2))(conv_layer4) else: pooling_layer2 = MaxPooling3D(pool_size=(1, 2, 2))(conv_layer4) # Normalize and flatten before feeding it to fully connected classification stage pooling_layer2 = BatchNormalization()(pooling_layer2) flatten_layer = Flatten()(pooling_layer2) # Add dropouts to avoid overfitting / perform regularization dense_layer1 = Dense(units=nodes_1, activation='relu')(flatten_layer) dense_layer2 = Dropout(0.4)(dense_layer1) if leaky: dense_layer3 = LeakyReLU(alpha=5)(dense_layer2) else: dense_layer3 = Dense(units=nodes_2, activation='relu')(dense_layer2) dense_layer4 = Dropout(0.4)(dense_layer3) output_layer = Dense(2, activation='softmax')(dense_layer4) # Define the model with input layer and output layer model = Model(inputs=input_layer, outputs=output_layer) if summary: print(model.summary()) model.compile(loss=categorical_crossentropy, optimizer=Adadelta(lr=0.1), metrics=['acc']) if len(xtest) > 0 and len(ytest) > 0: history = model.fit(x=xtrain, y=ytrain, batch_size=32, epochs=epochs, validation_data=(xtest, ytest), verbose=2) else: history = model.fit(x=xtrain, y=ytrain, batch_size=32, epochs=epochs, validation_split=0.2, verbose=2) # Save the model and history to disk filename = constants.SAVED_MODELS + name + '.sav' pickle.dump(model, open(filename, 'wb')) his_filename = constants.SAVED_MODELS + name + '_history.sav' pickle.dump(history, open(his_filename, 'wb')) else: providence_filepath = constants.SAVED_MODELS + name + '.sav' exists = os.path.isfile(providence_filepath) if exists: model = pickle.load( open(constants.SAVED_MODELS + name + '.sav', 'rb')) if summary: print(model.summary()) print('{} is ready.'.format(name.capitalize())) else: prin('No saved model detected!') self.model = model
def predictAllShop_ANN_part_together(all_data, trainAsTest=False, saveFilePath=None, featurePath=None, cate_level=0, cate_name=None, featureSavePath=None, needSaveFeature=False, time=1): """ 使用所有商家所有数据训练,预测所有商店 :param trainAsTest: 是否使用训练集后14天作为测试集 :param model: 某个模型 :param saveFilePath :param featurePath: :param cate_level: :param cate_name: :param featureSavePath: :param needSaveFeature: :param time:跑第几次 :return: """ ignores = 0 shopids = None shop_need_to_predict = 2000 if (cate_level is 0): shopids = range(1, 1 + shop_need_to_predict, 1) else: shopids = Parameter.extractShopValueByCate(cate_level, cate_name) shop_info = pd.read_csv(Parameter.shopinfopath, names=[ "shopid", "cityname", "locationid", "perpay", "score", "comment", "level", "cate1", "cate2", "cate3" ]) weekOrWeekend = True day_back_num = 21 sameday_backNum = 7 week_backnum = 3 other_features = [statistic_functon_mean, statistic_functon_median] other_features = [] '''将cate1 onehot''' cate = shop_info['cate1'].tolist() cate_dup = set(cate) cates = [] for i in range(len(cate_dup)): cates.append([i]) hot_encoder = OneHotEncoder().fit(cates) dicts = dict(zip(cate_dup, range(len(cate_dup)))) cate_num = [] for c in cate: cate_num.append([dicts[c]]) '''cate1 onehot finish''' if featurePath is None: all_x = None all_y = None for shopid in shopids: if shopid in Parameter.ignore_shopids: print "ignore get train", shopid ignores += 1 continue print "get ", shopid, " train" part_data = all_data[all_data.shopid == shopid] last_14_real_y = None # 取出一部分做训练集 if trainAsTest: #使用训练集后14天作为测试集的话,训练集为前面部分 last_14_real_y = part_data[len(part_data) - 14:]["count"].values part_data = part_data[0:len(part_data) - 14] # print last_14_real_y skipNum = part_data.shape[0] - 128 if skipNum < 0: skipNum = 0 train_x = None if sameday_backNum != 0: sameday = extractBackSameday(part_data, sameday_backNum, skipNum, nan_method_sameday_mean) train_x = getOneWeekdayFomExtractedData(sameday) if day_back_num != 0: if train_x is not None: train_x = np.concatenate( (train_x, getOneWeekdayFomExtractedData( extractBackDay(part_data, day_back_num, skipNum, nan_method_sameday_mean))), axis=1) else: train_x = getOneWeekdayFomExtractedData( extractBackDay(part_data, day_back_num, skipNum, nan_method_sameday_mean)) if weekOrWeekend: ws = getOneWeekdayFomExtractedData( extractWorkOrWeekend(part_data, skipNum)) hot_encoder = onehot(ws) train_x = np.concatenate( (train_x, hot_encoder.transform(ws).toarray()), axis=1) count = extractCount(part_data, skipNum) train_y = getOneWeekdayFomExtractedData(count) for feature in other_features: value = getOneWeekdayFomExtractedData( extractBackWeekValue(part_data, week_backnum, skipNum, nan_method_sameday_mean, feature)) train_x = np.append(train_x, value, axis=1) # '''添加商家信息''' # # print train_x,train_x.shape # index = shopid - 1 # oneshopinfo = shop_info.ix[index] # shop_perpay = oneshopinfo['perpay'] if not pd.isnull(oneshopinfo['perpay']) else 0 # shop_score = oneshopinfo['score'] if not pd.isnull(oneshopinfo['score']) else 0 # shop_comment = oneshopinfo['comment'] if not pd.isnull(oneshopinfo['comment']) else 0 # shop_level = oneshopinfo['level'] if not pd.isnull(oneshopinfo['level']) else 0 # shop_cate1 = oneshopinfo['cate1'] # import warnings # with warnings.catch_warnings(): # warnings.simplefilter("ignore",category=DeprecationWarning) # shop_cate1_encoder = hot_encoder.transform([dicts[shop_cate1]]).toarray() # train_x = np.insert(train_x,train_x.shape[1],shop_perpay,axis=1) # train_x = np.insert(train_x,train_x.shape[1],shop_score,axis=1) # train_x = np.insert(train_x,train_x.shape[1],shop_comment,axis=1) # train_x = np.insert(train_x,train_x.shape[1],shop_level,axis=1) # for i in range(shop_cate1_encoder.shape[1]): # train_x = np.insert(train_x,train_x.shape[1],shop_cate1_encoder[0][i],axis=1) # '''商家信息添加完毕''' if all_x is None: all_x = train_x all_y = train_y else: all_x = np.insert(all_x, all_x.shape[0], train_x, axis=0) all_y = np.insert(all_y, all_y.shape[0], train_y, axis=0) # '''添加周几''' # extract_weekday = getOneWeekdayFomExtractedData(extractWeekday(part_data, skipNum)) # train_x = np.append(train_x, extract_weekday, axis=1) # '''''' # train_x = train_x.reshape((train_x.shape[0], # train_x.shape[1], 1)) # print model.get_weights() # part_counts = [] # for i in range(7): # weekday = i + 1 # part_count = getOneWeekdayFomExtractedData(count, weekday) # part_counts.append(part_count) train_x = all_x train_y = all_y if needSaveFeature: featureAndLabel = np.concatenate((train_x, train_y), axis=1) flDF = pd.DataFrame( featureAndLabel, columns=[ "sameday1", "sameday2", "sameday3", "week_mean1", "week_mean2", "week_mean3", "week_median1", "week_median2", "week_median3", "perpay", "score", "comment", "level", "cate1_1", "cate1_2", "cate1_3", "cate1_4", "cate1_5", "cate1_6", "label" ]) if featureSavePath is None: if trainAsTest: featureSavePath = "train_feature/%df_%d_%s.csv" % ( flDF.shape[1] - 1, cate_level, cate_name) else: featureSavePath = "feature/%df_%d_%s.csv" % ( flDF.shape[1] - 1, cate_level, cate_name) flDF.to_csv(featureSavePath) else: #有featurePath文件 flDF = pd.read_csv(featurePath, index_col=0) train_x = flDF.values[:, :-1] train_y = flDF.values[:, -1:] # print train_x # print train_y '''将t标准化''' x_scaler = MinMaxScaler().fit(train_x) y_scaler = MinMaxScaler().fit(train_y) train_x = x_scaler.transform(train_x) train_y = y_scaler.transform(train_y) '''标准化结束''' '''构造神经网络''' h1_activation = "relu" rnn_epoch = 60 verbose = 0 h_unit = 16 batch_size = 5 np.random.seed(128) model = Sequential() model.add( Dense(h_unit, init="normal", input_dim=train_x.shape[1], activation=h1_activation)) #sigmoid model.add( Dense(1, init="normal", activation='linear', activity_regularizer=activity_l2(0.01))) sgd = SGD(0.005) # rmsprop = RMSprop(0.01) # adagrad = Adagrad(0.05) adadelta = Adadelta(0.01) adam = Adam(0.0001) adamax = Adamax(0.01) nadam = Nadam(0.01) model.compile(loss="mse", optimizer=adam) '''构造结束''' model.fit(train_x, train_y, nb_epoch=rnn_epoch, batch_size=batch_size, verbose=verbose) format = "%Y-%m-%d" if trainAsTest: startTime = datetime.datetime.strptime("2016-10-18", format) else: startTime = datetime.datetime.strptime("2016-11-1", format) timedelta = datetime.timedelta(1) '''预测所有商家''' preficts_all = None real_all = None for j in shopids: if j in Parameter.ignore_shopids: print "ignore predict", j continue print "predict:", j preficts = [] part_data = all_data[all_data.shopid == j] last_14_real_y = None if trainAsTest: #使用训练集后14天作为测试集的话,训练集为前面部分 last_14_real_y = part_data[len(part_data) - 14:]["count"].values part_data = part_data[0:len(part_data) - 14] '''预测14天''' for i in range(14): currentTime = startTime + timedelta * i strftime = currentTime.strftime(format) # index = getWeekday(strftime) - 1 # part_count = part_counts[index] #取前{sameday_backNum}周同一天的值为特征进行预测 part_data = part_data.append( { "count": 0, "shopid": j, "time": strftime, "weekday": getWeekday(strftime) }, ignore_index=True) x = None if sameday_backNum != 0: x = getOneWeekdayFomExtractedData( extractBackSameday(part_data, sameday_backNum, part_data.shape[0] - 1, nan_method_sameday_mean)) if day_back_num != 0: if x is None: x = getOneWeekdayFomExtractedData( extractBackDay(part_data, day_back_num, part_data.shape[0] - 1, nan_method_sameday_mean)) else: x = np.concatenate( (x, getOneWeekdayFomExtractedData( extractBackDay(part_data, day_back_num, part_data.shape[0] - 1, nan_method_sameday_mean))), axis=1) if weekOrWeekend: x = np.concatenate( (x, hot_encoder.transform( getOneWeekdayFomExtractedData( extractWorkOrWeekend( part_data, part_data.shape[0] - 1))).toarray()), axis=1) for feature in other_features: x_value = getOneWeekdayFomExtractedData( extractBackWeekValue(part_data, week_backnum, part_data.shape[0] - 1, nan_method_sameday_mean, feature)) x = np.append(x, x_value, axis=1) # '''添加周几''' # x = np.append(x, getOneWeekdayFomExtractedData(extractWeekday(part_data, part_data.shape[0]-1)), axis=1) # '''''' # '''添加商家信息''' # index = j - 1 # oneshopinfo = shop_info.ix[index] # shop_perpay = oneshopinfo['perpay'] if not pd.isnull(oneshopinfo['perpay']) else 0 # shop_score = oneshopinfo['score'] if not pd.isnull(oneshopinfo['score']) else 0 # shop_comment = oneshopinfo['comment'] if not pd.isnull(oneshopinfo['comment']) else 0 # shop_level = oneshopinfo['level'] if not pd.isnull(oneshopinfo['level']) else 0 # shop_cate1 = oneshopinfo['cate1'] # import warnings # with warnings.catch_warnings(): # warnings.simplefilter("ignore",category=DeprecationWarning) # shop_cate1_encoder = hot_encoder.transform([dicts[shop_cate1]]).toarray() # x = np.insert(x,x.shape[1],shop_perpay,axis=1) # x = np.insert(x,x.shape[1],shop_score,axis=1) # x = np.insert(x,x.shape[1],shop_comment,axis=1) # x = np.insert(x,x.shape[1],shop_level,axis=1) # for i in range(shop_cate1_encoder.shape[1]): # x = np.insert(x,x.shape[1],shop_cate1_encoder[0][i],axis=1) # '''商家信息添加完毕''' x = x_scaler.transform(x) # for j in range(sameday_backNum): # x.append(train_y[len(train_y) - (j+1)*7][0]) # x = np.array(x).reshape((1, sameday_backNum)) # print x # x = x.reshape(1, sameday_backNum, 1) predict = model.predict(x) if predict.ndim == 2: predict = y_scaler.inverse_transform(predict)[0][0] elif predict.ndim == 1: predict = y_scaler.inverse_transform(predict)[0] if (predict <= 0): predict == 1 preficts.append(predict) part_data.set_value(part_data.shape[0] - 1, "count", predict) preficts = (removeNegetive(toInt(np.array(preficts)))).astype(int) if preficts_all is None: preficts_all = preficts else: preficts_all = np.insert(preficts_all, preficts_all.shape[0], preficts, axis=0) if trainAsTest: last_14_real_y = (removeNegetive(toInt( np.array(last_14_real_y)))).astype(int) if real_all is None: real_all = last_14_real_y else: real_all = np.insert(real_all, real_all.shape[0], last_14_real_y, axis=0) # print preficts,last_14_real_y print str(j) + ',score:', scoreoneshop(preficts, last_14_real_y) # preficts = np.array(preficts) preficts_all = preficts_all.reshape((len(shopids) - ignores, 14)) if trainAsTest: real_all = real_all.reshape((len(shopids) - ignores, 14)) preficts_all = np.concatenate((preficts_all, real_all), axis=1) shopids = shopids.tolist() for remove in Parameter.ignore_shopids: try: shopids.remove(remove) except: pass preficts_all = np.insert(preficts_all, 0, shopids, axis=1) if saveFilePath is not None: path = saveFilePath + "_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%dtime.csv" \ % (sameday_backNum, day_back_num, train_x.shape[1],cate_level,cate_name ,rnn_epoch,batch_size,h_unit,h1_activation,time) print "save in :", path np.savetxt(path, preficts_all, fmt="%d", delimiter=",") return preficts_all
def CreateModel(self): ''' 定义CNN/LSTM/CTC模型,使用函数式模型 输入层:39维的特征值序列,一条语音数据的最大长度设为1500(大约15s) 隐藏层一:1024个神经元的卷积层 隐藏层二:池化层,池化窗口大小为2 隐藏层三:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 隐藏层四:循环层、LSTM层 隐藏层五:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 隐藏层六:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, 输出层:自定义层,即CTC层,使用CTC的loss作为损失函数 当前未完成,网络模型可能还需要修改 ''' # 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500 input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH)) layer_h1_c = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, padding="valid")(input_data) # 卷积层 #layer_h1_a = Activation('relu', name='relu0')(layer_h1_c) layer_h1_a = LeakyReLU(alpha=0.3)(layer_h1_c) # 高级激活层 layer_h1 = MaxPooling1D(pool_size=2, strides=None, padding="valid")(layer_h1_a) # 池化层 layer_h2 = BatchNormalization()(layer_h1) layer_h3_c = Conv1D(filters=256, kernel_size=5, strides=1, use_bias=True, padding="valid")(layer_h2) # 卷积层 layer_h3_a = LeakyReLU(alpha=0.3)(layer_h3_c) # 高级激活层 #layer_h3_a = Activation('relu', name='relu1')(layer_h3_c) layer_h3 = MaxPooling1D(pool_size=2, strides=None, padding="valid")(layer_h3_a) # 池化层 layer_h4 = Dropout(0.1)(layer_h3) # 随机中断部分神经网络连接,防止过拟合 layer_h5 = Dense(256, use_bias=True, activation="softmax")(layer_h4) # 全连接层 layer_h6 = Dense(256, use_bias=True, activation="softmax")(layer_h5) # 全连接层 #layer_h4 = Activation('softmax', name='softmax0')(layer_h4_d1) layer_h7 = LSTM(256, activation='softmax', use_bias=True, return_sequences=True)(layer_h6) # LSTM层 layer_h8 = LSTM(256, activation='softmax', use_bias=True, return_sequences=True)(layer_h7) # LSTM层 layer_h9 = LSTM(256, activation='softmax', use_bias=True, return_sequences=True)(layer_h8) # LSTM层 layer_h10 = LSTM(256, activation='softmax', use_bias=True, return_sequences=True)(layer_h9) # LSTM层 #layer_h10 = Activation('softmax', name='softmax1')(layer_h9) layer_h10_dropout = Dropout(0.1)(layer_h10) # 随机中断部分神经网络连接,防止过拟合 layer_h11 = Dense(512, use_bias=True, activation="softmax")(layer_h10_dropout) # 全连接层 layer_h12 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, activation="softmax")(layer_h11) # 全连接层 #layer_h6 = Dense(1283, activation="softmax")(layer_h5) # 全连接层 y_pred = Activation('softmax', name='softmax2')(layer_h12) model_data = Model(inputs=input_data, outputs=y_pred) #model_data.summary() #labels = Input(name='the_labels', shape=[60], dtype='float32') labels = Input(name='the_labels', shape=[self.label_max_string_length], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ), name='ctc')( [y_pred, labels, input_length, label_length]) #top_k_decoded, _ = K.ctc_decode(y_pred, input_length) #self.decoder = K.function([input_data, input_length], [top_k_decoded[0]]) #y_out = Activation('softmax', name='softmax3')(loss_out) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) model.summary() # clipnorm seems to speeds up convergence #sgd = SGD(lr=0.0001, decay=1e-8, momentum=0.9, nesterov=True, clipnorm=5) ada_d = Adadelta(lr=0.001, rho=0.95, epsilon=1e-06) #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = sgd, metrics=['accuracy']) #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = ada_d, metrics=['accuracy']) ctc_cost model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=ada_d, metrics=['accuracy', self.ctc_cost]) # captures output of softmax so we can decode the output during visualization self.test_func = K.function([input_data], [y_pred]) self.test_func_input_length = K.function([input_length], [input_length]) print('[*提示] 创建模型成功,模型编译成功') return model
batch_size = 128 train_datagen = ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) validation_datagen = ImageDataGenerator(rescale=1. / 255) train_generator = train_datagen.flow_from_directory(train_data_dir, target_size=(224, 224), batch_size=batch_size, class_mode='categorical') validation_generator = validation_datagen.flow_from_directory( val_data_dir, target_size=(224, 224), batch_size=batch_size, class_mode='categorical') num_classes = 2 epochs = 20 input_shape = (224, 224, 3) model = ResnetBuilder.build_resnet_18(input_shape, num_classes) model.compile(loss='categorical_crossentropy', optimizer=Adadelta(), metrics=['accuracy']) model.fit_generator(train_generator, steps_per_epoch=20059 / batch_size, epochs=epochs, validation_data=validation_generator, validation_steps=5040 / batch_size)
backwards = Dropout(0.25)(backwards) forwards = LSTM(hidden_dim)(forwards) forwards = Dropout(0.25)(forwards) backwards = LSTM(hidden_dim, go_backwards=True)(backwards) backwards = Dropout(0.25)(backwards) merged = concatenate([forwards, backwards], axis=-1) lstm = Dropout(0.25)(merged) output = Dense(1, activation='linear')(lstm) model = Model(inputs=sequence, outputs=output) optimizer = Adadelta(lr=1.0) model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['mae', keras_pearsonr]) model.fit(X_train, y_train, validation_data=[X_test, y_test], batch_size=batch_size, epochs=nb_epoch, verbose=2) y_pred = model.predict(X_test, batch_size=batch_size).flatten() # print(y_pred) mse = mean_squared_error(y_test, y_pred) mae = mean_absolute_error(y_test, y_pred)
from diary import Diary import matplotlib.pyplot as plt plt.ion() plt.rcParams['image.cmap'] = 'gray' plt.rcParams['figure.figsize'] = (5,3.5) np.random.seed(1234) _EPSILON=10e-8 PATH_SAVE='datasets/mnist/' binarize=False add_noise=False #optimizer = SGD(lr=0.5, decay=1e-1, momentum=0.9, nesterov=False) optimizer = Adadelta(lr=1.0, rho=0.95, epsilon=1e-06) #optimizer = RMSprop() #optimizer = Adagrad(lr=1.0, epsilon=1e-06) #optimizer = Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08) train_size=50000 num_epochs=30 batch_size=5000 inner_batch_size=5000 nb_classes=2 noise_proportion=0.25 score_lin=np.linspace(0,1,100) minibatch_method='lineal' # 'random', 'lineal' n_hidden=[25, 25] output_activation= 'sigmoid' # 'isotonic_regression' # sigmoid if nb_classes == 2:
model.add(Conv2D(128, (3, 3), padding='same', activation=None, use_bias=True)) model.add(BatchNormalization()) model.add(Spiking_BRelu()) model.add(Conv2D(128, (3, 3), padding='same', activation=None, use_bias=True)) model.add(BatchNormalization()) model.add(Spiking_BRelu()) model.add(Flatten()) model.add(Dense(256, activation=None, use_bias=True)) model.add(BatchNormalization()) model.add(Spiking_BRelu()) model.add(Dense(40, activation=None, use_bias=True)) model.add(BatchNormalization()) model.add(Spiking_BRelu()) model.add(Softmax_Decode(key)) adaptive = AdaptiveSharpener(verbose=True, min_init_epochs=1) max_epochs = 1 # Stop training if model isn't fully sharpened after 100 epochs. model.compile(loss='categorical_crossentropy', optimizer=Adadelta(lr=4.0, rho=0.95, epsilon=1e-8, decay=0.0), metrics=['accuracy']) model.fit(x_train, y_train, epochs=max_epochs, callbacks=[adaptive]) new_model = copy_remove_batchnorm(model) # Test both the original and the "copy" and compare their accuracy. score = model.evaluate(x_test, y_test)[1] score_new = model.evaluate(x_test, y_test)[1] print('score with batchnorm =', score) print('score after removing batchnorm =', score_new) print('They should be the same.')