def make_optimizer(name: str, lr: Optional[float], clipnorm: float) -> optimizers.Optimizer: if name == 'sgd': lr = lr or 0.01 return optimizers.SGD(lr=lr, clipnorm=clipnorm) elif name == 'adagrad': lr = lr or 0.01 return optimizers.Adagrad(lr=lr, clipnorm=clipnorm) elif name == 'adam': lr = lr or 0.001 return optimizers.Adam(lr=lr, clipnorm=clipnorm) elif name == 'adamax': lr = lr or 0.001 return optimizers.Adamax(lr=lr, clipnorm=clipnorm) elif name == 'nadam': lr = lr or 0.001 return optimizers.Nadam(lr=lr, clipnorm=clipnorm) else: raise NotImplementedError
def setOptimizer(self, config): configOptimizer = config["model"]["optimizer"].lower() if configOptimizer == "Adadelta".lower(): self.optimizer = optimizers.Adadelta() elif configOptimizer == "Adagrad".lower(): self.optimizer = optimizers.Adagrad() elif configOptimizer == "Adamax".lower(): self.optimizer = optimizers.Adamax() elif configOptimizer == "Ftrl".lower(): self.optimizer = optimizers.Ftrl() elif configOptimizer == "SGD".lower(): self.optimizer = optimizers.SGD() elif configOptimizer == "Nadam".lower(): self.optimizer = optimizers.Nadam() elif configOptimizer == "Optimizer".lower(): self.optimizer = optimizers.Optimizer() elif configOptimizer == "RMSprop".lower(): self.optimizer = optimizers.RMSprop()
def get_optimizer(): optimizer_name = optimizer_names[random.randint(0, len(optimizer_names) - 1)] model_attributes.optimizer_name = optimizer_name if optimizer_name == 'SGD': return optimizers.SGD(lr=get_learning_rate()) elif optimizer_name == 'RMSprop': return optimizers.RMSprop(lr=get_learning_rate()) elif optimizer_name == 'Adagrad': return optimizers.Adagrad(lr=get_learning_rate()) elif optimizer_name == 'Adadelta': return optimizers.Adadelta(lr=get_learning_rate()) elif optimizer_name == 'Adam': return optimizers.Adam(lr=get_learning_rate()) elif optimizer_name == 'Adamax': return optimizers.Adamax(lr=get_learning_rate()) elif optimizer_name == 'Nadam': return optimizers.Nadam(lr=get_learning_rate()) return None
def train(self, trn_C_list, trn_S_list, trn_batch, val_C_list, val_S_list, val_batch, max_epochs, early_stopping): # {{{ opt = optimizers.Adamax(lr=0.001) self.model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) cb_checkpoint = callbacks.ModelCheckpoint( self.model_dir + "/" + self.model_name + '-{epoch:03d}-{accuracy:.4f}-{val_accuracy:.4f}.h5', save_best_only=True, monitor='val_accuracy', mode='max') cb_checkpoint_best = callbacks.ModelCheckpoint( self.model_dir + "/" + self.model_name + '-best.h5', save_best_only=True, monitor='val_accuracy', mode='max') cb_earlystopping = callbacks.EarlyStopping(monitor='val_accuracy', mode='max', verbose=2, patience=early_stopping) callbacks_list = [cb_checkpoint, cb_checkpoint_best, cb_earlystopping] steps_train = int((len(trn_C_list) + len(trn_S_list)) / trn_batch) g_train = self.train_generator(trn_C_list, trn_S_list, trn_batch) #steps_train = 100 steps_valid = int((len(val_C_list) + len(val_S_list)) / val_batch) g_valid = self.valid_generator(val_C_list, val_S_list, val_batch) self.model.fit(g_train, steps_per_epoch=steps_train, validation_data=g_valid, validation_steps=steps_valid, callbacks=callbacks_list, epochs=max_epochs)
def create_optimizer(optimizer, learning_rate): """ Simply returns an optimizer based on the string refering it. :param optimizer: Name of the optimizer :param learning_rate: learning rate of the optimizer :return: tensorflow.keras.{optimizers}. """ if optimizer.lower() == "adadelta": return optimizers.Adadelta(lr=learning_rate) elif optimizer.lower() == "adagrad": return optimizers.Adagrad(lr=learning_rate) elif optimizer.lower() == "adam": return optimizers.Adam(lr=learning_rate) elif optimizer.lower() == "adamax": return optimizers.Adamax(lr=learning_rate) elif optimizer.lower() == "nadam": return optimizers.Nadam(lr=learning_rate) elif optimizer.lower() == "rmsprop": return optimizers.RMSprop(lr=learning_rate) elif optimizer.lower() == "sgd": return optimizers.SGD(lr=learning_rate)
def get_optimizer(self, optimizer, learning_rate): if optimizer == 'adam': opt = optimizers.Adam(lr=learning_rate) elif optimizer == 'adamax': opt = optimizers.Adamax(lr=learning_rate) elif optimizer == 'radam': opt = tfa.optimizers.RectifiedAdam(learning_rate=learning_rate) elif optimizer == 'sgd': # lambda x: 1. # lambda x: gamma ** x # lambda x: 1 / (2.0 ** (x - 1)) lr_schedule = tfa.optimizers.cyclical_learning_rate.CyclicalLearningRate( initial_learning_rate=learning_rate, maximal_learning_rate=100 * learning_rate, step_size=25, scale_mode="iterations", scale_fn=lambda x: 0.95**x, name="CustomScheduler") opt = optimizers.SGD(learning_rate=lr_schedule) else: raise ValueError('Undefined OPTIMIZER_TYPE!') return opt
def chexnet_model(FLAGS): """ Builds the chexnet model using specifics from FLAGS. Returns a compiled model.""" base_model = DenseNet121(include_top=False, weights='imagenet', input_shape=(FLAGS.image_size, FLAGS.image_size, 3)) x = base_model.output x = GlobalAveragePooling2D()(x) predictions = Dense(14, activation='sigmoid', bias_initializer='ones')(x) model = Model(inputs=base_model.input, outputs=predictions) if FLAGS.opt == 'adam': opt = optimizers.Adam(lr=FLAGS.lr) elif FLAGS.opt == 'sgd': opt = optimizers.SGD(lr=FLAGS.lr, momentum=FLAGS.momentum, nesterov=FLAGS.nesterov) elif FLAGS.opt == 'rmsprop': opt = optimizers.RMSProp(lr=FLAGS.lr) elif FLAGS.opt == 'adagrad': opt = optimizers.Adagrad(lr=FLAGS.lr) elif FLAGS.opt == 'adadelta': opt = optimizers.Adadelta(lr=FLAGS.lr) elif FLAGS.opt == 'adamax': opt = optimizers.Adamax(lr=FLAGS.lr) elif FLAGS.opt == 'nadam': opt = optimizers.Nadam(lr=FLAGS.lr) else: print("No optimizer selected. Using Adam.") opt = optimizers.Adam(lr=FLAGS.lr) hvd_opt = hvd.DistributedOptimizer(opt) model.compile(loss='binary_crossentropy', optimizer=hvd_opt, metrics=['accuracy']) return model
def get_optimizer(optimization_function, learning_rate): if optimization_function == "Adam": optimization_function = Optimizer.Adam(learning_rate=learning_rate) elif optimization_function == "SGD": optimization_function = Optimizer.SGD(learning_rate=learning_rate) elif optimization_function == "RMSprop": optimization_function = Optimizer.RMSprop(learning_rate=learning_rate) elif optimization_function == "Adagrad": optimization_function = Optimizer.Adagrad(learning_rate=learning_rate) elif optimization_function == "Adadelta": optimization_function = Optimizer.Adadelta(learning_rate=learning_rate) elif optimization_function == "Adamax": optimization_function = Optimizer.Adamax(learning_rate=learning_rate) elif optimization_function == "Nadam": optimization_function = Optimizer.Nadam(learning_rate=learning_rate) return optimization_function
def evaluate_lstm_model(X): dt = X.index.map(lambda a: pd.datetime.fromtimestamp(a / 1000)) df_final = pd.DataFrame({'y': X, 'ds': dt}) df_final = df_final.reset_index(drop=True) # prepare training and test dataset train_size = int(len(df_final) * 0.66) n_steps = 144 #X, y = split_sequence(np.array(df_final['y']), n_steps) X = np.array(df_final['y'][:-1]).reshape(-1, 1) y = np.array(df_final['y'].shift(-1)[:-1]) X_train, X_test = X[:train_size], X[train_size:] y_train, y_test = y[:train_size], y[train_size:] # Scaler scaler = StandardScaler() scaled_train = scaler.fit_transform(X_train) #scaled_train = scaled_train.reshape((scaled_train.shape[0], scaled_train.shape[1], 1)) # Model construction model = Sequential() model.add(Input(shape=(n_steps, 1), name='Input_y')) model.add(CuDNNLSTM(128, return_sequences=True, name='LSTM1')) model.add(Dropout(0.1, name='Dropout1')) model.add(CuDNNLSTM(128, return_sequences=True, name='LSTM2')) model.add(Dropout(0.1, name='Dropout2')) model.add(Dense(1, name='Output')) # Model compiling opt = optimizers.Adamax(learning_rate=0.001) model.compile(loss='mean_squared_error', optimizer=opt) # Training epochs = 100 model.fit(scaled_train, y_train, epochs=epochs, batch_size=100, verbose=2) # make predictions scaled_test = scaler.transform(X_test) predictions = model.predict( scaled_test.reshape(scaled_test.shape[0], scaled_test.shape[1], 1)) forecast = [predictions[i][0][0] for i in range(0, len(predictions))] # Plot lines plt.figure(figsize=(25, 20)) plt.plot(y_test, label="Y") plt.plot(forecast, label="Yhat") plt.legend() plt.show() # calculate out of sample error error = mean_absolute_percentage_error(y_test, forecast) return error
test_len = test_label.shape[0] all_label = np.concatenate((train_label, test_label)) train_onehot, test_labels = encode_onehot(all_label, train_len) # Test pretrained model model = resnet(train_feature_.shape) # Optimizers sgd = opt.SGD(lr=0.01, momentum=0.5, nesterov=False) adam = opt.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False) rms_prop = opt.RMSprop(lr=0.01, rho=0.9) adagrad = opt.Adagrad(lr=0.01) adadelta = opt.Adadelta(lr=1.0, rho=0.95) adamax = opt.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999) nadam = opt.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999) model.compile(optimizer=adam, loss="categorical_crossentropy") # print(model.summary()) print("ResNet-101") train_start = time.time() model.fit( train_feature_, train_onehot.toarray(), batch_size=64, # batch_size=1775, epochs=500, )
def funcc(x): x = asarray_chkfinite(x) size = len(x) - 6 extra1 = x[size] epoc = int(extra1) extra2 = x[size + 1] extra3 = x[size + 2] extra5 = x[size + 3] extra6 = x[size + 4] extra7 = x[size + 5] OP = int(extra7) LO = int(extra3) batchzi = int(extra2) alp = (extra1 - int(extra1)) losses = [' '] * 16 losses[0] = "mean_squared_error" losses[1] = "mean_absolute_error" losses[2] = "mean_absolute_percentage_error" losses[3] = "mean_squared_logarithmic_error" losses[4] = "squared_hinge" losses[5] = 'hinge' losses[6] = 'categorical_hinge' losses[7] = 'logcosh' losses[8] = 'huber_loss' losses[9] = 'sparse_categorical_crossentropy' losses[10] = 'binary_crossentropy' losses[11] = 'kullback_leibler_divergence' losses[12] = 'poisson' var1 = extra5 var2 = extra6 learningrate = var2 - int(var2) acti = Activation('linear') opti = optimizers.SGD(lr=(learningrate * .1), decay=0, momentum=0.0, nesterov=False) if OP == 0: opti = optimizers.SGD(learning_rate=0.01, momentum=0.0, nesterov=False) elif OP == 1: opti = optimizers.SGD(learning_rate=learningrate, momentum=0.0, nesterov=False) elif OP == 2: opti = optimizers.SGD(learning_rate=learningrate, momentum=0.0, nesterov=True) elif OP == 3: opti = optimizers.RMSprop(learning_rate=0.001, rho=0.9) elif OP == 4: opti = optimizers.RMSprop(learning_rate=learningrate, rho=0.9) elif OP == 5: opti = optimizers.Adagrad(learning_rate=0.01) elif OP == 7: opti = optimizers.Adagrad(learning_rate=learningrate) elif OP == 8: opti = optimizers.Adadelta(learning_rate=1.0, rho=0.95) elif OP == 9: opti = optimizers.Adadelta(learning_rate=learningrate, rho=0.95) elif OP == 10: opti = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False) elif OP == 11: opti = optimizers.Adam(learning_rate=learningrate, beta_1=0.9, beta_2=0.999, amsgrad=False) elif OP == 12: opti = optimizers.Adam(learning_rate=learningrate, beta_1=var1, beta_2=var2, amsgrad=False) elif OP == 13: opti = optimizers.Adamax(learning_rate=0.002, beta_1=0.9, beta_2=0.999) elif OP == 14: opti = optimizers.Adamax(learning_rate=learningrate, beta_1=0.9, beta_2=0.999) elif OP == 15: opti = optimizers.Adamax(learning_rate=learningrate, beta_1=var1, beta_2=var2) elif OP == 16: opti = optimizers.Nadam(learning_rate=0.002, beta_1=0.9, beta_2=0.999) elif OP == 17: opti = optimizers.Nadam(learning_rate=learningrate, beta_1=0.9, beta_2=0.999) elif OP == 18: opti = optimizers.Nadam(learning_rate=learningrate, beta_1=var1, beta_2=var2) else: opti = optimizers.SGD(learning_rate=0.01, momentum=0.0, nesterov=False) if math.isnan(max(x)): x = np.full((size + 1, 1), 1) # n = x[size] + 1 leak = [None] * size for i in range(size): leak[i] = (x[i] - int(x[i])) * .3 leaky = np.mean(leak) leaky = leaky - int(leaky) nn_length = 0 # determine length of nn for l in range(size): if x[l] >= 1: nn_length = nn_length + 1 nn_create = [None] * nn_length nn_create_leak = [None] * nn_length nn_length = 0 for cr in range(size): if x[cr] >= 1: nn_create[nn_length] = int(x[cr]) nn_create_leak[nn_length] = leak[cr] nn_length = nn_length + 1 # create training and testing data # first seed random number generator with system time seed(5) # initialize training and testing arrays # testing_range = 250 # training_range = batchzi # f_testing_inputs = np.zeros((testing_range, 2)) # f_testing_outputs = np.zeros((testing_range, 1)) # training data global f_training_inputs global f_training_outputs ff_training_inputs = f_training_inputs[:batchzi] ff_training_outputs = f_training_outputs[:batchzi] global fr_look # testing data global testing_range global f_testing_inputs global f_testing_outputs global f_look # set random seed seed(1) # create neural network model = Sequential() # the first layer # model.add(Dense(2, input_dim=2, activation=acti, kernel_initializer=tf.keras.initializers.glorot_normal(seed=0))) # model.add(acti) # intermediate layers if any nn_length = nn_length for k in range(nn_length): v1 = nn_create_leak[k] v2 = nn_create_leak[k] V1 = v1 * 10000 V2 = (V1 - int(V1)) * 10000 A1 = int(V1) / 10000 A2 = int(V2) / 10000 acti = tf.keras.layers.ReLU(max_value=None, negative_slope=A1, threshold=A2) if k == 0: model.add( Dense(nn_create[k], input_dim=2, activation=acti, kernel_initializer=tf.keras.initializers.glorot_normal( seed=0))) else: model.add( Dense(nn_create[k], activation=acti, kernel_initializer=tf.keras.initializers.glorot_normal( seed=k + 1))) # the last layer model.add( Dense(1, activation='linear', kernel_initializer=tf.keras.initializers.glorot_normal( seed=nn_length + 1))) # model.add(acti) # compile network adadelta = optimizers.Adadelta() adam = optimizers.Adam() passloss = losses[LO] sgd = optimizers.SGD(lr=(learningrate * .1), decay=0, momentum=0.0, nesterov=False) rprop = optimizers.RMSprop() model.compile(loss=passloss, optimizer=opti, metrics=['accuracy']) # train model model.fit(ff_training_inputs, ff_training_outputs, epochs=epoc, batch_size=batchzi, verbose=0) weights = model.get_weights() arc1 = model.to_yaml() # feed testing data through neural networks f_nn_out = model.predict(f_testing_inputs) if np.isnan(f_nn_out).any(): return float("inf") insf = np.isneginf(f_nn_out) in1 = np.any(insf) insf = np.isposinf(f_nn_out) in1 = np.any(insf) in2 = np.any(insf) if in1 or in2: return float("inf") err = np.zeros((testing_range, 1)) # from the testing nn outputs computer the average and max error f_error = (abs(f_nn_out[0] - f_testing_outputs[0]) / f_testing_outputs[0]) * 100 f_nn_error_avg = abs(f_error) f_nn_error_max = abs(f_error) f_nn_error_min = abs(f_error) for c in range(1, testing_range): f_error = (abs(f_nn_out[c] - f_testing_outputs[c]) / f_testing_outputs[c]) * 100 f_nn_error_avg = abs((f_error + f_nn_error_avg) * .5) f_nn_error_max = abs(max(f_error, f_nn_error_max)) f_nn_error_min = abs(min(f_error, f_nn_error_min)) # take the average of the average and max and returns as fitness # return as percent error # return 100 if nn died if f_nn_out.max() == 0: f_fitness = float("inf") else: f_fitness = abs(((f_nn_error_avg + f_nn_error_max) * .5)) if math.isnan(f_fitness): f_fitness = float("inf") if f_fitness == float("-inf"): f_fitness = float("inf") global mun global countt f_fitness = model.evaluate(f_testing_inputs, f_testing_outputs, batch_size=batchzi, verbose=0) f_fitness = max(f_fitness) f_fitness = (2 * f_fitness + f_nn_error_max) / 3 # f_fitness = f_fitness * f_nn_error_max # f_fitness = f_fitness * f_nn_error_max*100 if f_fitness < .0000000000000000000000000000000000001: a_a = float(f_fitness) b_b = a_a.__round__(5) c_c = str(b_b) d_d = c_c.replace(".", "DOT") name1 = d_d countt = countt + 1 name2 = 'Weights_model.h5' name4 = 'archi.txt' name3 = name1 + name2 name5 = name1 + name4 # file_k = open(name5, "w+") # model.save_weights(name3) # file_k.write(arc1) # file_k.close() if f_fitness < mun: now = datetime.datetime.now() mun = f_fitness file1 = open("progress.txt", "a") file1.write(str(x)) file1.write(" avg:") file1.write(str(f_nn_error_avg)) file1.write(" max:") file1.write(str(f_nn_error_max)) file1.write(" fitness:") file1.write(str(f_fitness)) file1.write(" completed:") file1.write(str(now)) file1.write("\n") file1.close() del model return f_fitness
def parameter_update(theta_0, ln_q, ln_1_q, ln_s, mu, sigma, n_u, n_y, jitter, sample_size_w=4096, batch_size=None, optimizer_choice='adam', lr=1e-3, max_batch=int(1024), factr=1e-8, plot_loss=True): batch_L = [] if optimizer_choice == 'adam': optimizer = optimizers.Adam(lr=lr) elif optimizer_choice == 'adadelta': optimizer = optimizers.Adadelta(lr=lr) elif optimizer_choice == 'adagrad': optimizer = optimizers.Adagrad(lr=lr) elif optimizer_choice == 'adamax': optimizer = optimizers.Adamax(lr=lr) elif optimizer_choice == 'ftrl': optimizer = optimizers.Ftrl(lr=lr) elif optimizer_choice == 'nadam': optimizer = optimizers.Nadam(lr=lr) elif optimizer_choice == 'rmsprop': optimizer = optimizers.RMSprop(lr=lr) elif optimizer_choice == 'sgd': optimizer = optimizers.SGD(lr=lr) theta = tf.Variable(theta_0) fin_theta = theta_0.copy() if batch_size is None: batch_size = int(numpy.floor(n_y / 2)) batch_idx = numpy.arange(0, n_y, batch_size) batch_num = len(batch_idx) - 1 converge = False for i in range(0, int(1e8)): for j in range(0, batch_num): raw_sample_w = tf.random.normal((sample_size_w, 3 * (batch_idx[j + 1] - batch_idx[j])), dtype='float64') _, g_t = get_obj_g(theta, ln_q[batch_idx[j]:batch_idx[j + 1]], ln_1_q[batch_idx[j]:batch_idx[j + 1]], ln_s[batch_idx[j]:batch_idx[j + 1]], mu[batch_idx[j]:batch_idx[j + 1]], sigma[batch_idx[j]:batch_idx[j + 1]], n_u, (batch_idx[j + 1] - batch_idx[j]), raw_sample_w, jitter) optimizer.apply_gradients(zip([g_t], [theta])) theta = theta.numpy() theta[:2] = numpy.abs(theta[:2]) theta[:2][theta[:2] <= 1e-8] = 1e-8 theta[5:8][theta[5:8] <= 1e-8] = 1e-8 raw_sample_w = tf.random.normal((sample_size_w, 3 * numpy.shape(ln_q)[0]), dtype='float64') L_t = vi_obj(theta, ln_q, ln_1_q, ln_s, mu, sigma, n_u, numpy.shape(ln_q)[0], raw_sample_w, jitter) tmp_L = (L_t.numpy() / numpy.shape(ln_q)[0]) if len(batch_L) >= 2: if tmp_L < numpy.min(batch_L[:-1]): fin_theta = theta.copy() theta = tf.Variable(theta) if numpy.mod(len(batch_L), 16) == 0: print('=============================================================================') print(theta[:8]) print(theta[-6:]) print('Batch: ' + str(len(batch_L)) + ', optimiser: ' + optimizer_choice + ', Loss: ' + str(tmp_L)) print('=============================================================================') batch_L.append(numpy.min(tmp_L)) if plot_loss: fig = matplotlib.pyplot.figure(figsize=(16, 9)) matplotlib.pyplot.plot(numpy.arange(0, len(batch_L)), numpy.array(batch_L)) matplotlib.pyplot.xlabel('Batches') matplotlib.pyplot.ylabel('Loss') matplotlib.pyplot.title('Learning Rate: ' + str(lr)) matplotlib.pyplot.grid(True) matplotlib.pyplot.ylim([numpy.min(batch_L), numpy.median(batch_L)]) try: fig.savefig('./' + str(n_u) + '_' + optimizer_choice + '_' + str(lr) + '.png', bbox_inches='tight') except PermissionError: pass except OSError: pass matplotlib.pyplot.close(fig) if len(batch_L) > batch_num*16: previous_opt = numpy.min(batch_L.copy()[:-batch_num*16]) current_opt = numpy.min(batch_L.copy()[-batch_num*16:]) if numpy.mod(len(batch_L), 16) == 0: print('Previous And Recent Top Averaged Loss Is:') print(numpy.hstack([previous_opt, current_opt])) if previous_opt - current_opt <= numpy.abs(previous_opt * factr): converge = True break if len(batch_L) >= max_batch: converge = True break per_idx = numpy.random.permutation(n_y) ln_q = ln_q[per_idx] ln_1_q = ln_1_q[per_idx] ln_s = ln_s[per_idx] mu = mu[per_idx] sigma = sigma[per_idx] if converge: break return fin_theta
(train_images, train_labels), (test_images, test_labels) = mnist.load_data() train_images = train_images / 255.0 test_images = test_images / 255.0 train_labels = to_categorical(train_labels) test_labels = to_categorical(test_labels) # opts = [ optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False), optimizers.Adam(learning_rate=0.002, beta_1=0.9, beta_2=0.9, amsgrad=True), optimizers.Adamax(learning_rate=0.001, beta_1=0.9, beta_2=0.999), optimizers.Adamax(learning_rate=0.002, beta_1=0.9, beta_2=0.9), optimizers.Nadam(learning_rate=0.001, beta_1=0.9, beta_2=0.999), optimizers.Nadam(learning_rate=0.002, beta_1=0.9, beta_2=0.9), optimizers.SGD(learning_rate=0.01, momentum=0.0, nesterov=False), optimizers.SGD(learning_rate=0.03, momentum=0.1, nesterov=True), optimizers.Adadelta(learning_rate=1.0, rho=0.95), optimizers.Adadelta(learning_rate=1.1, rho=0.99), optimizers.RMSprop(learning_rate=0.001, rho=0.9), optimizers.RMSprop(learning_rate=0.005, rho=0.92) ] # model = Sequential() model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dense(10, activation='softmax'))
def resnet_fcn(args): # Input shape input_shape = (None, None, 3) # input_shape = (32, 32, 3) # ResNet Blocks resent_blks = args.resnet_blks # Model Input layers input_layer = layers.Input(shape=input_shape) l = layers.Conv2D(32, 3)(input_layer) l = layers.BatchNormalization()(l) l = layers.Activation('relu')(l) l = layers.Conv2D(64, 3)(l) l = layers.BatchNormalization()(l) l = layers.Activation('relu')(l) # l = layers.MaxPooling2D()(l) l = layers.AveragePooling2D()(l) l = layers.Dropout(0.3)(l) # ResNet Blocks for i in range(resent_blks): if resent_blks <= 10: l = resnet_block_shallow(l, 64, 3) else: l = resnet_block_deep(l, 64, 3) l = layers.Dropout(0.5)(l) # Final Convolutions l = layers.Conv2D(64, 3)(l) l = layers.BatchNormalization()(l) l = layers.Activation('relu')(l) # l = layers.GlobalAveragePooling2D()(l) # l = layers.GlobalMaxPooling2D()(l) # l = layers.MaxPooling2D()(l) l = layers.AveragePooling2D()(l) l = layers.Dropout(0.5)(l) # Fully convolutional output l = layers.Conv2D(filters=512, kernel_size=6, strides=1)(l) l = layers.BatchNormalization()(l) # l = layers.Dropout(0.5)(l) l = layers.Activation('relu')(l) l = layers.Conv2D(args.num_classes, 1, 1)(l) # l = layers.GlobalMaxPooling2D()(l) l = layers.GlobalAveragePooling2D()(l) output_layer = layers.Activation('softmax')(l) # Final model model = tf.keras.Model(input_layer, output_layer) # Initiate optimizer # opt = optimizers.Adam(learning_rate=args.learning_rate_res) # opt = optimizers.Adamax(learning_rate=args.learning_rate_res) opt = optimizers.Adamax(learning_rate=lr_sched(0)) # Let's train the model using RMSprop model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) return model
row = np.zeros(N_CLASSES) row[label - 1] = 1 one_hot_encoded.append(row) y_one_hot = np.array(one_hot_encoded) X_train, Y_train = features[:TRAIN_SIZE], y_one_hot[:TRAIN_SIZE] X_test, Y_test = features[TRAIN_SIZE: TRAIN_SIZE + TEST_SIZE], y_one_hot[TRAIN_SIZE: TRAIN_SIZE + TEST_SIZE] X_validation, Y_validation = features[TRAIN_SIZE + TEST_SIZE : ], y_one_hot[TRAIN_SIZE + TEST_SIZE : ] dataset_train = tf.data.Dataset.from_tensor_slices((X_train, Y_train)).batch(256).shuffle(buffer_size=1000) dataset_test = tf.data.Dataset.from_tensor_slices((X_test, Y_test)).batch(256) dataset_validation = tf.data.Dataset.from_tensor_slices((X_validation, Y_validation)).batch(256) sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) rms= optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0) ada= optimizers.Adagrad(lr=0.001, epsilon=None, decay=0.0) delta = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0) adam = optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) adamax = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0) nadam = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) optimizers = [adam, sgd, rms, ada, delta, adamax, nadam] log = compare_classifiers(optimizers, dataset_train, dataset_validation, dataset_test) print (log) log.to_csv('tensorflow.csv', mode='a', sep=',')
def parameter_update(theta_0, ln_q, ln_1_q, ln_s, mu, sigma, n_u, n_y, jitter, sample_size_w=1024, batch_size=None, val_size=None, optimizer_choice='adam', lr=1e-3, max_batch=int(4096), tol=8, factr=1e-3, plot_loss=True, print_info=True): batch_L = [] gap = [] if optimizer_choice == 'adam': optimizer = optimizers.Adam(lr=lr) elif optimizer_choice == 'adadelta': optimizer = optimizers.Adadelta(lr=lr) elif optimizer_choice == 'adagrad': optimizer = optimizers.Adagrad(lr=lr) elif optimizer_choice == 'adamax': optimizer = optimizers.Adamax(lr=lr) elif optimizer_choice == 'ftrl': optimizer = optimizers.Ftrl(lr=lr) elif optimizer_choice == 'nadam': optimizer = optimizers.Nadam(lr=lr) elif optimizer_choice == 'rmsprop': optimizer = optimizers.RMSprop(lr=lr) elif optimizer_choice == 'sgd': optimizer = optimizers.SGD(lr=lr) else: optimizer = None theta = tf.Variable(theta_0) fin_theta = theta_0.copy() if val_size is not None: if val_size > n_y: val_size = n_y val_idx = numpy.arange(0, n_y) else: val_idx = numpy.random.choice(numpy.arange(0, n_y), val_size, replace=False) else: val_idx = None for i in range(0, int(1e8)): if batch_size is None: tmp_idx = numpy.arange(0, n_y) else: tmp_idx = numpy.random.choice(numpy.arange(0, n_y), batch_size, replace=False) raw_sample_w = tf.random.normal((sample_size_w, 3 * len(tmp_idx)), dtype='float64') L_t, g_t = get_obj_g(theta, ln_q[tmp_idx], ln_1_q[tmp_idx], ln_s[tmp_idx], mu[tmp_idx], sigma[tmp_idx], n_u, len(tmp_idx), n_y, raw_sample_w, jitter) optimizer.apply_gradients(zip([g_t], [theta])) theta = theta.numpy() theta[:2] = numpy.abs(theta[:2]) theta[:2][theta[:2] <= 1e-8] = 1e-8 theta[5:8][theta[5:8] <= 1e-8] = 1e-8 if val_size is not None: if numpy.mod(i, numpy.min([numpy.floor(tol / 2), 8])) == 0: raw_sample_w = tf.random.normal((sample_size_w, 3 * val_size), dtype='float64') tmp_L_t = vi_obj(theta, ln_q[val_idx], ln_1_q[val_idx], ln_s[val_idx], mu[val_idx], sigma[val_idx], n_u, val_size, n_y, raw_sample_w, jitter) tmp_L = (tmp_L_t.numpy() / n_y) else: tmp_L = (L_t.numpy() / n_y) batch_L.append(numpy.min(tmp_L)) if len(batch_L) >= 2: if tmp_L < numpy.min(batch_L[:-1]): fin_theta = theta.copy() theta = tf.Variable(theta) if (numpy.mod(len(batch_L), tol) == 0) & print_info: print( '=============================================================================' ) print(theta[:8]) print(theta[-6:]) print('Batch: ' + str(len(batch_L)) + ', optimiser: ' + optimizer_choice + ', Loss: ' + str(tmp_L)) print( '=============================================================================' ) if len(batch_L) > tol: previous_opt = numpy.min(batch_L.copy()[:-tol]) current_opt = numpy.min(batch_L.copy()[-tol:]) gap.append(previous_opt - current_opt) if (numpy.mod(len(batch_L), tol) == 0) & print_info: print('Previous And Recent Top Averaged Loss Is:') print(numpy.hstack([previous_opt, current_opt])) print('Current Improvement, Initial Improvement * factr') print(numpy.hstack([gap[-1], gap[0] * factr])) if (len(gap) >= 2) & (gap[-1] <= (gap[0] * factr)): print('Total batch number: ' + str(len(batch_L))) print('Initial Loss: ' + str(batch_L[0])) print('Final Loss: ' + str(numpy.min(batch_L))) print('Current Improvement, Initial Improvement * factr') print(numpy.hstack([gap[-1], gap[0] * factr])) break if len(batch_L) >= max_batch: break if plot_loss: fig = matplotlib.pyplot.figure(figsize=(16, 9)) matplotlib.pyplot.plot(numpy.arange(0, len(batch_L)), numpy.array(batch_L)) matplotlib.pyplot.xlabel('Batches') matplotlib.pyplot.ylabel('Loss') matplotlib.pyplot.title('Learning Rate: ' + str(lr)) matplotlib.pyplot.grid(True) try: fig.savefig('./' + str(n_y) + '_' + str(n_u) + '_' + optimizer_choice + '_' + str(lr) + '.png', bbox_inches='tight') except PermissionError: pass except OSError: pass matplotlib.pyplot.close(fig) return fin_theta
input_shape=(x_train1.shape[1], x_train1.shape[2]), activation="tanh", recurrent_activation="sigmoid", use_bias=True, kernel_initializer="glorot_uniform", recurrent_initializer="orthogonal", recurrent_dropout=dropout, return_sequences=False)) # regressor.add(Dropout(dropout)) # regressor.add(GRU(units = h2, activation="tanh", recurrent_activation="sigmoid", use_bias=True, kernel_initializer="glorot_uniform", recurrent_initializer="orthogonal", recurrent_dropout=dropout, return_sequences = True)) # regressor.add(Dropout(dropout)) # regressor.add(GRU(units = h2, activation="tanh", recurrent_activation="sigmoid", use_bias=True, kernel_initializer="glorot_uniform", recurrent_initializer="orthogonal", recurrent_dropout=dropout, return_sequences = False)) # #adamax=optimizers.Adam(lr=learning_rate)#, beta_1=0.9, beta_2=0.99) regressor.add(Dense(units=num_classes, activation='sigmoid')) adamax = optimizers.Adamax( lr=learning_rate) #, beta_1=0.9, beta_2=0.99) regressor.compile( loss=tensorflow.keras.losses.categorical_crossentropy, optimizer=adamax, metrics=['accuracy']) print(regressor.summary()) regressor.fit(x_train1, y_train1, batch_size=batch_size, epochs=epochs, verbose=1, callbacks=[history]) plt.plot(history.history['loss'], label='train')
def start_train( config, config_path, yolo_model: yolo.YOLO_Model, train_generator, valid_generator, dry_mode: bool ): print('Full training') ############################### # Optimizers ############################### optimizers = { 'sgd': opt.SGD(lr=config['train']['learning_rate']), 'adam': opt.Adam(lr=config['train']['learning_rate']), 'adamax': opt.Adamax(lr=config['train']['learning_rate']), 'nadam': opt.Nadam(lr=config['train']['learning_rate']), 'rmsprop': opt.RMSprop(lr=config['train']['learning_rate']), # 'Radam': RAdam(lr=config['train']['learning_rate'], warmup_proportion=0.1, min_lr=1e-5) } optimizer = optimizers[config['train']['optimizer'].lower()] if config['train']['clipnorm'] > 0: optimizer.clipnorm = config['train']['clipnorm'] if config['train'].get('lr_decay', 0) > 0: optimizer.decay = config['train']['lr_decay'] if config['train']['optimizer'] == 'Nadam': # Just to set field optimizer.decay = 0.0 ############################### # Callbacks ############################### checkpoint_name = utils.get_checkpoint_name(config) utils.makedirs_4_file(checkpoint_name) checkpoint_vloss = cbs.CustomModelCheckpoint( model_to_save=yolo_model.infer_model, filepath=checkpoint_name, monitor='val_loss', verbose=1, save_best_only=True, mode='min', period=1 ) # tensorboard_logdir = utils.get_tensorboard_name(config) # utils.makedirs(tensorboard_logdir) # print('Tensorboard dir: {}'.format(tensorboard_logdir)) # tensorboard_cb = TensorBoard( # log_dir=tensorboard_logdir, # histogram_freq=0, # write_graph=False # ) mAP_checkpoint_name = utils.get_mAP_checkpoint_name(config) mAP_checkpoint_static_name = utils.get_mAP_checkpoint_static_name(config) utils.makedirs_4_file(mAP_checkpoint_name) map_evaluator_cb = cbs.MAP_evaluation( model=yolo_model, generator=valid_generator, save_best=True, save_name=mAP_checkpoint_name, save_static_name=mAP_checkpoint_static_name, # tensorboard=tensorboard_cb, neptune=neptune if not dry_mode else None ) reduce_on_plateau = ReduceLROnPlateau( monitor='val_loss', factor=0.4, patience=20, verbose=1, mode='min', min_delta=0, cooldown=10, min_lr=1e-8 ) early_stop = EarlyStopping( monitor='val_loss', min_delta=0, patience=80, mode='min', verbose=1 ) neptune_mon = cbs.NeptuneMonitor( monitoring=['loss', 'val_loss'], neptune=neptune ) # logger_cb = cbs.CustomLogger( # config=config, # tensorboard=tensorboard_cb # ) # fps_logger = cbs.FPSLogger( # infer_model=yolo_model.infer_model, # generator=valid_generator, # infer_sz=config['model']['infer_shape'], # tensorboard=tensorboard_cb # ) callbacks = [ # tensorboard_cb, map_evaluator_cb, # early_stop, reduce_on_plateau, ] ############################### # Prepare fit ############################### if not dry_mode: callbacks.append(neptune_mon) with open('config.json', 'w') as f: json.dump(config, f, indent=4) sources_to_upload = [ 'yolo.py', '_common/backend.py', 'config.json' ] params = { 'base_params': str(config['model']['base_params']), 'infer_size': "H{}xW{}".format(*config['model']['infer_shape']), 'anchors_per_output': config['model']['anchors_per_output'], 'anchors': str(config['model']['anchors']) } tags = [ config['model']['base'] ] logger.info('Tags: {}'.format(tags)) neptune.create_experiment( name=utils.get_neptune_name(config), upload_stdout=False, upload_source_files=sources_to_upload, params=params, tags=tags ) else: config['train']['nb_epochs'] = 10 yolo_model.train_model.compile(loss=yolo.dummy_loss, optimizer=optimizer) yolo_model.train_model.fit_generator( generator=train_generator, steps_per_epoch=len(train_generator) * config['train']['train_times'], validation_data=valid_generator, validation_steps=len(valid_generator) * config['valid']['valid_times'], epochs=config['train']['nb_epochs'], verbose=1, callbacks=callbacks, workers=mp.cpu_count(), max_queue_size=100, use_multiprocessing=False ) if not dry_mode: neptune.send_artifact(mAP_checkpoint_static_name) neptune.send_artifact('config.json')
monitor='val_auc', mode='max', verbose=1) #{epoch:03d}-{auc:03f}-{val_auc:03f} reduce_lr_loss = ReduceLROnPlateau(monitor='val_auc', factor=0.1, patience=7, verbose=1, min_delta=1e-6, mode='max') model = create_cnn(input_y, input_x, 3) #model.compile(loss='mean_squared_error', optimizer=optimizers.Adam(lr=2e-5), metrics=['mae']) #RMSprop #model = load_model('model.h5') model.compile(loss='binary_crossentropy', optimizer=optimizers.Adamax(lr=2e-4), metrics=['AUC']) #categorical_hinge f1_loss model.fit(train_generator, epochs=100, validation_data=test_generator, callbacks=[mcp_save, reduce_lr_loss]) #validation_steps=20 """ ##Validate. validate = df validation_datagen = ImageDataGenerator(rescale=1./255) validation_generator = validation_datagen.flow_from_dataframe(dataframe=validate, directory='/tmp/TIS/', x_col='image', y_col='bin_score',
epochs=10, batch_size=256, verbose=0) return model, history opts = { 'RMSProp,lr=.001': optimizers.RMSprop(learning_rate=0.001), 'RMSProp,lr=.01': optimizers.RMSprop(learning_rate=0.01), 'RMSProp,lr=.1': optimizers.RMSprop(learning_rate=0.1), 'Adam,lr=.001,e=1e-7': optimizers.Adam(learning_rate=0.001), 'Adam,lr=.01,e=1e-7': optimizers.Adam(learning_rate=0.01), 'Adam,lr=.1,e=1e-7': optimizers.Adam(learning_rate=0.1), 'Adam,lr=.001,e=1e-4': optimizers.Adam(learning_rate=0.001, epsilon=0.0001), 'Adamax,lr=.001': optimizers.Adamax(learning_rate=0.001), 'Adamax,lr=.01': optimizers.Adamax(learning_rate=0.01), 'Adamax,lr=.1': optimizers.Adamax(learning_rate=0.1), 'SGD,lr=.001': optimizers.SGD(learning_rate=0.001), 'SGD,lr=.01': optimizers.SGD(learning_rate=0.01), 'SGD,lr=.1': optimizers.SGD(learning_rate=0.1) } def test_optimizers(opts): stats = {} result = {} epochs = range(1, 11) res_model = None max_acc = 0 best_opt = None
train_images = train_images.reshape((60000, 28 * 28)) train_images = train_images.astype('float32')/255 test_images = test_images.reshape((10000, 28 * 28)) test_images = test_images.astype('float32')/255 train_labels = to_categorical(train_labels) test_labels = to_categorical(test_labels) network = models.Sequential() network.add(layers.Dense(50,activation='relu', input_shape=(28 * 28,))) network.add(layers.Dense(10, activation='softmax')) opt = optimizers.Adamax(learning_rate=0.01) network.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) history = network.fit(train_images, train_labels, epochs=10, batch_size=128, validation_data=(test_images,test_labels)) test_loss, test_acc = network.evaluate(test_images, test_labels) print('test_acc: ', test_acc) import matplotlib.pyplot as plt