def configure(Model, Loss='binary_crossentropy'): optimizers.adadelta(lr=0.01, decay=2e-4) Model.compile(loss=Loss, optimizer='adadelta', metrics=['accuracy']) print( '\n################ The Detail of the VGG19 ###################' ) print(Model.summary()) time.sleep(5) print( '\n######################################################################\n' )
def DModel(): x = l.Input(shape=(24, )) # m = l.Dense(800,activation="relu")(x) # m = l.Dropout(0.5)(m) # m = l.Dropout(0.5)(m) m = l.Dense(50, activation="relu", kernel_initializer="glorot_uniform")(x) m = l.Dense(200, activation="relu", kernel_initializer="glorot_uniform")(m) m = l.Dense(200, activation="relu", kernel_initializer="glorot_uniform")(m) m = l.Dense(50, activation="relu", kernel_initializer="glorot_uniform")(m) m = l.Dense(50, activation="relu", kernel_initializer="glorot_uniform")(m) # m = l.Dropout(0.5)(m) # m = l.Dense(400,activation="relu",kernel_initializer="glorot_uniform")(m) # m = l.Dropout(0.5)(m) # m = l.Dense(800,activation="relu",kernel_initializer="glorot_uniform")(m) out = l.Dense(1, activation="sigmoid", kernel_initializer="glorot_uniform")(m) model = Model(inputs=x, outputs=out) # opt = Opt.Adam(lr=0.002, beta_1 =0.9, beta_2 = 0.999, decay=0.0001) opt = Opt.adadelta(lr=0.1, rho=0.95, epsilon=None, decay=0.0001) model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"]) return model
def create_model(input_size, output_size, n_layers, n_neurons, activation_function, learning_rate, dropout_rate, optimizer): model = models.Sequential() model.add( layers.Dense(n_neurons, input_shape=(input_size, ), name='new_androdet_dense_1')) for _ in range(n_layers): if dropout_rate != 0.0: model.add(layers.Dropout(dropout_rate, noise_shape=None, seed=None)) model.add(layers.Dense(n_neurons, activation=activation_function)) model.add(layers.Dense(output_size, activation="sigmoid")) #model.summary() if optimizer == 'rmsprop': opt = optimizers.rmsprop(lr=learning_rate) elif optimizer == 'adam': opt = optimizers.adam(lr=learning_rate) elif optimizer == 'sgd': opt = optimizers.sgd(lr=learning_rate) elif optimizer == 'adagrad': opt = optimizers.adagrad(lr=learning_rate) elif optimizer == 'adadelta': opt = optimizers.adadelta(lr=learning_rate) elif optimizer == 'adamax': opt = optimizers.adamax(lr=learning_rate) elif optimizer == 'nadam': opt = optimizers.nadam(lr=learning_rate) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=["mean_squared_error"]) return model
def create_maxout_model(input_shape): """ Create a maxout model with three convolutional maxout layers and one dense maxout layer. Args: input_shape (tuple): shape of the images to run on; i.e. (rows, cols, channels) Returns: the compiled keras model, ready to be trained. """ inputs = layers.Input(shape = input_shape, name='input') x = layers.Dropout(0.2, name='dropout_1')(inputs) # First maxout layer x = layers.Maximum(name='maxout_1')([ layers.Conv2D(96, (8,8), activation='relu', padding='same', name='conv_1_{}'.format(i))(x) for i in range(2) ]) x = layers.MaxPool2D(name='maxpool_1')(x) x = layers.Dropout(0.2, name='dropout_2')(x) # Second maxout layer x = layers.Maximum(name='maxout_2')([ layers.Conv2D(192, (8,8), activation='relu', padding='same', name='conv_2_{}'.format(i))(x) for i in range(2) ]) x = layers.MaxPool2D(name='maxpool_2')(x) x = layers.Dropout(0.2, name='dropout_3')(x) # Third maxout layer x = layers.Maximum(name='maxout_3')([ layers.Conv2D(192, (5,5), activation='relu', padding='same', name='conv_3_{}'.format(i))(x) for i in range(2) ]) x = layers.MaxPool2D(name='maxpool_3')(x) x = layers.Flatten(name='flatten')(x) x = layers.Dropout(0.2, name='dropout_4')(x) # Dense maxout layer x = layers.Maximum(name='maxout_5')([ layers.Dense(500, activation='relu', name='dense_1_{}'.format(i))(x) for i in range(5) ]) x = layers.Dropout(0.2, name='dropout_5')(x) predictions = layers.Dense(10, activation='softmax', name='dense_2')(x) model = Model(inputs = inputs, outputs = predictions) model.compile(loss='categorical_crossentropy', optimizer = optimizers.adadelta(), metrics=['accuracy']) return model
def adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0): """ Adadelta optimizer :param lr: >=0, initial learning rate, defaults to 1. It is recommended to leave it at the default value :param rho: >=0, Adadelta decay factor, corresponding to fraction of gradient to keep at each time step :param epsilon: >=0, fuzz factor. If None, defaults to K.epsilon() :param decay: >=0, initial learning rate decay """ return optimizers.adadelta(lr=lr, rho=rho, epsilon=epsilon, decay=decay)
def main(): name = 'williams' data = np.array(pkl.load(open('../X.pkl', 'rb'))) # labels = np.array(pkl.load(open('../y_bush_vs_others.pkl', 'rb'))).flatten() labels = np.array( pkl.load(open('../y_{0}_vs_others.pkl'.format(name), 'rb'))).flatten() data = data.reshape((len(data), 64, 64, 1)) x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=1. / 3, random_state=2518, stratify=labels, shuffle=True) # y_train = to_categorical(y_train) # y_test = to_categorical(y_test) print(data.shape) print(labels.shape) num_positive = np.sum(labels) class_weight = {0: 1., 1: len(labels) / num_positive * 2} print(class_weight) model = conv_predict_model(acthidden='tanh', actoutput='sigmoid') opt = optimizers.adadelta() # Revenir là dessus plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True) # # model.compile(loss='binary_crossentropy', optimizer='adadelta', metrics=['accuracy']) # model.fit(x_train, y_train, validation_data=( # x_test, y_test), shuffle=True, epochs=500, batch_size=16, class_weight=class_weight) # sequential_model_to_ascii_printout(model) #400 model.save("{0}_predict.model".format(name)) with open("{0}_history.pkl".format(name), 'wb') as file_pi: pkl.dump(model.history, file_pi)
def create_model(layers_and_filters, kernels, activation, input_shape, dropout_rate, optimizer, learning_rate, output_size=1): model = models.Sequential() i = 0 for filters in layers_and_filters: model.add( layers.Conv2D(filters, kernel_size=kernels[i], strides=kernels[i], activation=activation, input_shape=input_shape)) i += 1 if i < len(layers_and_filters): model.add(layers.MaxPooling2D(pool_size=(2, 2))) model.add(layers.BatchNormalization()) if dropout_rate != 0: model.add(layers.Dropout(dropout_rate)) model.add(layers.Flatten()) model.add(layers.Dense(output_size, activation='sigmoid')) if optimizer == 'rmsprop': opt = optimizers.rmsprop(lr=learning_rate) elif optimizer == 'adam': opt = optimizers.adam(lr=learning_rate) elif optimizer == 'sgd': opt = optimizers.sgd(lr=learning_rate) elif optimizer == 'adagrad': opt = optimizers.adagrad(lr=learning_rate) elif optimizer == 'adadelta': opt = optimizers.adadelta(lr=learning_rate) elif optimizer == 'adamax': opt = optimizers.adamax(lr=learning_rate) elif optimizer == 'nadam': opt = optimizers.nadam(lr=learning_rate) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=["mean_squared_error"]) #model.summary() return model
def init_model(self, model_name): # keras model = Sequential() model.add( Dense(hidden_size_0, input_shape=(num_actions, ), activation='relu')) model.add(Dense(hidden_size_1, activation='relu')) model.add(Dense(hidden_size_0, activation='relu')) model.add(Dense(num_actions)) # output layer model.compile(optimizer=optimizers.adadelta(lr=learning_rate), loss=losses.mse) if os.path.isfile(temp_model(self.model_name)): model = load_model(temp_model(self.model_name)) print("model_loaded") return model
def get_optimizer(config): if(config['optimizer'] == 'rmsprop'): opti = optimizers.rmsprop(lr=config['learning_rate'], clipvalue=config['grad_clip'], decay=config['decay_rate']) return opti elif(config['optimizer'] == 'adadelta'): opti = optimizers.adadelta(lr=config['learning_rate'], clipvalue=config['grad_clip']) return opti elif(config['optimizer'] == 'sgd'): opti = optimizers.sgd(lr=config['learning_rate'], momentum=config['momentum'], decay=config['learning_rate_decay']) return opti else: raise KeyError('optimizer name error')
def get_optimizer(name, lr): """ Args: name: lr: Returns: """ if name.lower() == "adam": return optimizers.adam(lr=lr, clipvalue=1.0) elif name.lower() == "rmsprop": return optimizers.rmsprop(lr=lr, clipvalue=1.0) elif name.lower() == "adadelta": return optimizers.adadelta(lr=lr, clipvalue=1.0) else: return optimizers.adam(lr=lr, clipvalue=1.0)
def ModelConv(): model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, kernel_size=(3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(.2)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(.5)) model.add(Dense(10, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=optimizers.adadelta(), metrics=['accuracy']) return model
def resetModel(self, model): with self.Trace["model/reset/optimizer"]: if self.OpType == "SGD": optimizer = optimizers.SGD(**self.OptimizerParams) elif self.OpType == "adadelta": optimizer = optimizers.adadelta(**self.OptimizerParams) elif self.OpType == "adagrad": optimizer = optimizers.adagrad(**self.OptimizerParams) else: raise VaueError("Unknown optimizer type %s" % (self.OpType, )) #self.Job.message("========= optimizer:%s, %s\n mbsize=%d, iterations=%d" % (optimizer, optimizer_params, self.MBSize, self.Iterations)) with self.Trace["model/reset/compile"]: model.compile(optimizer=optimizer, loss=self.Loss, metrics=[self.Metric]) with self.Trace["model/reset/set_weights"]: model.set_weights(self.Weights0) return model
def get_optimizer(self): ''' This function sets the optimizer from config file ''' self.optimizer = self.config.optimizer self.options = self.config.options if (self.options['name'].lower() == 'adam'): lr = self.options['lr'] #beta_1 = self.options['beta_1'] #beta_2 = self.options['beta_2'] #decay = self.options['decay'] optimizer = optimizers.adam(lr) #optimizer = optimizers.adam(lr, beta_1, beta_2, decay) elif (self.options['name'].lower() == 'adadelta'): lr = self.options['lr'] rho = self.options['rho'] epsilon = self.options['epsilon'] decay = self.options['decay'] optimizer = optimizers.adadelta(lr, rho, epsilon, decay) elif (self.options['name'].lower() == 'sgd'): lr = self.options['lr'] momentum = self.options['momentum'] decay = self.options['decay'] nesterov = self.options['nesterov'] optimizer = optimizers.sgd(lr, momentum, decay, nesterov) elif (self.options['name'].lower() == 'rmsprop'): lr = self.options['lr'] rho = self.options['rho'] epsilon = self.options['epsilon'] decay = self.options['decay'] optimizer = optimizers.rmsprop(lr, rho, epsilon, decay) return optimizer
def GetModel(mode='create', filename='none', X=None, Y=None): model = None if (mode == 'create'): model = CreateModel(X=X, Y=Y) print("Neural net created...") if (mode == 'load_W'): model = CreateModel(X=X, Y=Y) model.load_weights(filename) print("Neural net loaded...") if (mode == 'load_model'): model = keras_file_manager.LoadFromJSon(filename) print("Neural net loaded...") adag = adagrad() adad = adadelta() model.compile(loss='binary_crossentropy', optimizer='adadelta', metrics=['accuracy']) return model
def get_compiled_model(model_name, lrn_rate, class_weights=[1.0, 1.0, 1.0], checkpoint=None): json_path = join("weights", "keras", model_name + ".json") h5_path = join("weights", "keras", model_name + ".h5") if isfile(json_path) and isfile(h5_path): print("Keras model & weights found, loading...") with open(json_path, 'r') as file_handle: model = model_from_json(file_handle.read()) if checkpoint is not None and isfile(checkpoint): print('LOADING CHECKPOINT') model.load_weights(checkpoint) else: print('LOADING START WEIGHTS') model.load_weights(h5_path) #optimizer = SGD(lr=lrn_rate, momentum=0.9, nesterov=True) optimizer = adadelta() print('COMPILING MODEL') model.compile(optimizer=optimizer, loss=weighted_categorical_crossentropy(class_weights), metrics=['accuracy']) return model
def initialize_optimizer(optimizer_name: str, learning_rate: float, beta1: float, beta2: float, lr_decay: float, rho: float, fuzz: float, momentum: float) \ -> Union[adam, rmsprop, sgd, adagrad, adadelta, adamax]: """ Initializes an optimizer based on the user's choices. :param optimizer_name: the optimizer's name. Can be one of 'adam', 'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adamax'. :param learning_rate: the optimizer's learning_rate :param beta1: the optimizer's beta1 :param beta2: the optimizer's beta2 :param lr_decay: the optimizer's lr_decay :param rho: the optimizer's rho :param fuzz: the optimizer's fuzz :param momentum: the optimizer's momentum :return: the optimizer. """ if optimizer_name == 'adam': return adam(lr=learning_rate, beta_1=beta1, beta_2=beta2, decay=lr_decay) elif optimizer_name == 'rmsprop': return rmsprop(lr=learning_rate, rho=rho, epsilon=fuzz) elif optimizer_name == 'sgd': return sgd(lr=learning_rate, momentum=momentum, decay=lr_decay) elif optimizer_name == 'adagrad': return adagrad(lr=learning_rate, decay=lr_decay) elif optimizer_name == 'adadelta': return adadelta(lr=learning_rate, rho=rho, decay=lr_decay) elif optimizer_name == 'adamax': return adamax(lr=learning_rate, beta_1=beta1, beta_2=beta2, decay=lr_decay) else: raise ValueError('An unexpected optimizer name has been encountered.')
# 4. 모델 학습과정 설정하기 optimizer_list = [] # 기본 sgd optimizer_list.append(['SGD', optimizers.SGD()]) # momentum optimizer_list.append(['Momentum', optimizers.SGD(momentum=0.9)]) # NAG optimizer_list.append((['NAG', optimizers.SGD(momentum=0.9, nesterov=True)])) # Adagrad optimizer_list.append(['Adagrad', optimizers.adagrad()]) # RMSProp optimizer_list.append(['RMSProp', optimizers.rmsprop()]) # AdaDelta optimizer_list.append(['AdaDelta', optimizers.adadelta()]) # Adam optimizer_list.append(['Adam', optimizers.adam()]) # Nadam optimizer_list.append(['Nadam', optimizers.nadam()]) score_list = [] opt_name_list = [] for optimizer_element in optimizer_list: model.compile(loss='binary_crossentropy', optimizer=optimizer_element[1], metrics=['accuracy']) # 5. 모델 학습시키기 model.fit(x_train, y_train, epochs=1500, batch_size=64)
def train_submodel_diff(config): from keras.models import Sequential from keras.layers import Dense, Dropout from keras.layers import Conv1D, MaxPooling1D, LSTM from keras.layers import Embedding from keras.callbacks import ModelCheckpoint from keras.optimizers import adadelta from ray.tune.integration.keras import TuneReporterCallback import utils.definition_network as dn import pandas as pd from ray import tune x_train, y_train, x_valid, y_valid, num_words, embedding_matrix = config[ "exp_sets"].pp_data.load_data() trainable_emb = (config["exp_sets"].pp_data.use_embedding == ( dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC)) model = Sequential([ Embedding(config["exp_sets"].pp_data.vocabulary_size, config["exp_sets"].pp_data.embedding_size, trainable=trainable_emb, name=config["name"] + '_rt_emb_1'), Dropout(config["dropout"], name=config["name"] + '_rt_dropout_1'), Conv1D(filters=config["filters_by_layer"], kernel_size=config["kernels_size"], kernel_initializer='glorot_uniform', padding='valid', activation='relu', name=config["name"] + '_rt_conv_1'), MaxPooling1D(name=config["name"] + '_rt_max_pool_1'), LSTM(config["lstm_units"], kernel_initializer='glorot_uniform', activation='tanh', dropout=config["dropout_lstm"], recurrent_dropout=config["dropout_lstm"], return_sequences=True, name=config["name"] + '_rt_lstm_1'), LSTM(config["lstm_units"], kernel_initializer='glorot_uniform', activation='tanh', dropout=config["dropout_lstm"], recurrent_dropout=config["dropout_lstm"], return_sequences=True, name=config["name"] + '_rt_lstm_2'), LSTM(config["lstm_units"], kernel_initializer='glorot_uniform', activation='tanh', dropout=config["dropout_lstm"], recurrent_dropout=config["dropout_lstm"], name=config["name"] + '_rt_lstm_3'), Dense(3, activation='sigmoid', name=config["name"] + '_rt_dense_1') ]) model.compile(loss="binary_crossentropy", optimizer=adadelta(lr=config["lr"]), metrics=["accuracy"]) history = model.fit(x_train, y_train, batch_size=config["batch_size"], epochs=config["epochs"], verbose=0, validation_data=(x_valid, y_valid), callbacks=[ TuneReporterCallback(freq="epoch"), ModelCheckpoint(tune.get_trial_dir() + 'train_model.h5', monitor='val_acc', mode='max', save_best_only=True, save_weights_only=False, verbose=0) ]) hist_df = pd.DataFrame(history.history) with open(tune.get_trial_dir() + 'history_train_model.csv', mode='w') as file: hist_df.to_csv(file)
data_chosen = mnist if (data_chosen == mnist): train_x, train_y, test_x, test_y, class_name = mnist_reader() else: train_x, train_y, test_x, test_y, class_name = cifar10_reader() # model = test_network(input_shape=data_chosen['input_shape']) # # f = open(MatricNum+".json", "w") # # f.write(model.to_json()) # model.save_weights('rand_network_weights.h5') # plot_model(model, to_file='model.png', show_shapes=True) # history_log = LossHistory() csv_logger = CSVLogger('log.csv') histories = [] for optimizer in [optimizers.adadelta()]: # set_random_seed(1) # np.random.seed(1) # model.load_weights('rand_network_weights.h5', by_name=True) model.load_weights('my_network_weights.h5', by_name=True) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) history = model.fit(batch_size=32, x=train_x, y=train_y, epochs=10, verbose=1, callbacks=[history_log, csv_logger]) model.save_weights('my_network_weights.h5') histories.append(history)
def create_nin_model(input_shape): """ Create an NIN model with three mlpconv layers and a global average pooling layer for the given input shape. Args: input_shape (tuple): shape of the images to run on; i.e. (rows, cols, channels) Returns: the compiled keras model, ready to be trained. """ inputs = layers.Input(shape=input_shape, name='input') # First mlpconv layer x = layers.Conv2D(192, kernel_size=(5, 5), padding='same', activation='relu', name='mlpconv_1_conv5x5')(inputs) x = layers.Conv2D(160, kernel_size=(1, 1), padding='same', activation='relu', name='mlpconv_1_conv1x1_1')(x) x = layers.Conv2D(96, kernel_size=(1, 1), padding='same', activation='relu', name='mlpconv_1_conv1x1_2')(x) x = layers.MaxPool2D(name='maxpool_1')(x) x = layers.Dropout(0.5, name='dropout_1')(x) # Second mlpconv layer x = layers.Conv2D(192, kernel_size=(5, 5), padding='same', activation='relu', name='mlpconv_2_conv5x5')(x) x = layers.Conv2D(192, kernel_size=(1, 1), padding='same', activation='relu', name='mlpconv_2_conv1x1_1')(x) x = layers.Conv2D(192, kernel_size=(1, 1), padding='same', activation='relu', name='mlpconv_2_conv1x1_2')(x) x = layers.MaxPool2D(name='maxpool_2')(x) x = layers.Dropout(0.5, name='dropout_2')(x) # Third mlconv layer x = layers.Conv2D(192, kernel_size=(3, 3), padding='same', activation='relu', name='mlpconv_3_conv3x3')(x) x = layers.Conv2D(192, kernel_size=(1, 1), padding='same', activation='relu', name='mlpconv_3_conv1x1_1')(x) x = layers.Conv2D(10, kernel_size=(1, 1), padding='same', activation='relu', name='mlpconv_3_conv1x1_2')(x) x = layers.GlobalAveragePooling2D(name='globalavgpool')(x) predictions = layers.Activation('softmax', name='softmax')(x) model = Model(inputs=inputs, outputs=predictions) model.compile(loss='categorical_crossentropy', optimizer=optimizers.adadelta(), metrics=['accuracy']) return model
numel=2 * 9900 * 95000, offset=2 * 9900 * 95000 * 4) filter = getFilter(img_h=1024, img_w=1024, dim=9) batch_size = 20 epochs = 20 lr = 1 # change to 0.05 decay = 0.0 print(colored('@--------- Parameters ---------@', 'green')) print('batch size: ' + str(batch_size)) print('learning rate: ' + str(lr)) print('decay:' + str(decay)) # print('input vector: '+input_vector) print(colored('@------------------------------@', 'green')) # optimizer = optimizers.Adagrad(lr=lr, epsilon=None, decay=decay) optimizer = optimizers.adadelta(lr=lr, rho=0.95, decay=decay) # optimizer = optimizers.SGD(lr=lr, momentum=0.9, decay=decay, nesterov=False) filter.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) ''' tb = callbacks.TensorBoard( log_dir=path['log']+'winter/', batch_size=batch_size, histogram_freq=0, write_graph=True, write_images=True) earlystop = callbacks.EarlyStopping( monitor='val_loss', min_delta=1e-4,
timestep = 10 model = Sequential() #model.add(Embedding(20000, 256, input_length=80)) #model.add(Convolution2D(4,5, 5, border_mode='valid',input_shape=(3,240,320))) #model.add(Flatten()) model.add( LSTM(output_dim=512, return_sequences=False, input_shape=(timestep, dim_feature))) model.add(Dropout(0.2)) model.add(LSTM(512, return_sequences=False)) model.add(Dropout(0.2)) #model.add(Dense(1024,activation='relu')) #model.add(Dropout(0.5)) #model.add(Dense(512,activation='relu')) #model.add(Dropout(0.5)) model.add(Dense(51)) model.add(Activation('softmax')) #model.load_weights('my_weights/ucf101_train_lstm1000_001_19.h5') for layer in model.layers: layer.trainable = True Rmsprop = rmsprop(lr=0.001) Adadelta = adadelta(lr=0.001) model.compile( loss='categorical_crossentropy', #'binary_crossentropy', optimizer=Rmsprop, metrics=['accuracy'])
def main(learning_rate, batch_size, epochs, n_samples, validation_split): train_data = json.load(open("data/train-v1.1.json"))['data'] samples = get_train_samples(train_data)[:n_samples] print('Training samples: %d' % len(samples)) assert embedding_dim in [50, 100, 200, 300] glove_path = 'glove/glove.6B.%dd.txt' % embedding_dim print('Loading glove model') glove_model = load_glove_model(glove_path) print('Done loading glove model') contexts, questions, answers = zip(*samples) # Scan every word in the questions and contexts and index them tokenizer = Tokenizer() tokenizer.fit_on_texts(contexts + questions) word_index = tokenizer.word_index print('Done fitting tokenizer on texts, found %d unique tokens' % len(word_index)) print("Tokenizing questions and contexts") context_seqs = tokenizer.texts_to_sequences(contexts) question_seqs = tokenizer.texts_to_sequences(questions) # find start and end location in tokenized representation answers_str = [context[s:e] for context, (s, e) in zip(contexts, answers)] answer_seqs = tokenizer.texts_to_sequences(answers_str) # Pad the question- and context sequences to the same length context_seqs_padded = pad_sequences(context_seqs, maxlen=max_context_seq_length, padding='post') question_seqs_padded = pad_sequences(question_seqs, maxlen=max_question_seq_length, padding='post') print('Longest sequences:\n context: %d \n question: %d' % (max([len(s) for s in context_seqs]), max([len(s) for s in question_seqs]))) c_proportion = float( len([c for c in context_seqs if len(c) <= max_context_seq_length ])) / len(context_seqs) q_proportion = float( len([q for q in question_seqs if len(q) <= max_question_seq_length ])) / len(question_seqs) print( 'Proportion of contexts smaller or equal to %d: %f\nProportion questions smaller or equal to %d: %f' % (max_context_seq_length, c_proportion, max_question_seq_length, q_proportion)) print("Locating answer indexes in padded context sequences") ans_in_context = [ find_in_padded_seq(np.asarray(answer_seq), context_seq) for answer_seq, context_seq in zip(answer_seqs, context_seqs_padded) ] start, end = zip(*ans_in_context) # remove questions, contexts, answer triplets that have no located answer in our tokenized sequence representation to_remove = [i for i, s in enumerate(start) if s == -1] print('Removing %d samples' % len(to_remove)) context_seqs_padded = np.delete(context_seqs_padded, to_remove, axis=0) question_seqs_padded = np.delete(question_seqs_padded, to_remove, axis=0) start = np.delete(start, to_remove) end = np.delete(end, to_remove) # categorical labels of floats a_s_y = to_categorical(np.asarray(start, dtype='float32'), num_classes=max_context_seq_length) a_e_y = to_categorical(np.asarray(end, dtype='float32'), num_classes=max_context_seq_length) print(context_seqs_padded.shape, question_seqs_padded.shape, a_s_y.shape, a_e_y.shape) embedding_matrix = get_embedding_matrix(word_index, glove_model) print(embedding_matrix.shape) model = get_model(embedding_matrix, name='train') optimizer = optimizers.adadelta(lr=learning_rate) model.compile(optimizer=optimizer, loss='categorical_crossentropy') model.summary() callback = ModelCheckpoint( 'weights/weights.{epoch:02d}--{loss:.2f}--{val_loss:.2f}.h5', monitor='val_loss', save_weights_only=True) history = model.fit([context_seqs_padded, question_seqs_padded], [a_s_y, a_e_y], epochs=epochs, batch_size=batch_size, validation_split=validation_split, callbacks=[callback]) # model.save_weights('simple_bidaf_%d_epochs.h5' % epochs) plot_history(history)
def run(): np.random.seed(1337) # maxlen = 66 # Convolution filter_length = 5 nb_filter = 50 pool_length = 4 # LSTM lstm_output_size = 200 # Training batch_size = 30 nb_epoch = 10 print('Loading data...') import json ctdPath ='test.json' indexJson = open(ctdPath, "r") inputInfo = json.load(indexJson) indexJson.close() dictPath =inputInfo["ctdEm"] dataPath =inputInfo["mrPath"] (X_train, y_train), (X_test, y_test), WordEm = loadData(path=dataPath) print('datapath:',dataPath) print(len(X_train), 'train sequences') print(len(X_test), 'test sequences') print('Pad sequences (samples x time)') # X_train = sequence.pad_sequences(X_train, maxlen=maxlen) # X_test = sequence.pad_sequences(X_test, maxlen=maxlen) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) train_label = to_categorical(y_train, 2) test_label = to_categorical(y_test, 2) print('set hyper-parameters:') max_features = (WordEm.shape[0]) embedding_size = WordEm.shape[1] print('load ctd features...') import ctdFeatureData ctdWord = np.loadtxt(dictPath, delimiter=' ', dtype='float32') train,test = ctdFeatureData.makeidx_map(ctdPath) train = np.asarray(train, dtype='int32') test = np.asarray(test, dtype='int32') print('Build model...') maxlen =X_train.shape[1] def buildModel(): from keras.regularizers import l2 print('xxx') main_inputs = Input(shape=(maxlen,), dtype='int32', name='main_input') inputs = Embedding(max_features, embedding_size, input_length=maxlen, weights=[WordEm])(main_inputs) # x =Dropout(0.25)(inputs) convs = [] filter_sizes = (2, 3, 4) for fsz in filter_sizes: conv = Convolution1D(nb_filter=nb_filter, filter_length=fsz, border_mode='valid', activation='relu', subsample_length=1, W_regularizer=l2(l=0.01) )(inputs) pool = MaxPooling1D(pool_length=2)(conv) flatten = Flatten()(pool) convs.append(flatten) out = Merge(mode='concat',concat_axis=1)(convs) # out =GlobalMaxPooling1D()(convs) out =BatchNormalization()(out) # out =LSTM(lstm_output_size,activation='relu')(out) predict = Dense(2, activation='softmax',W_regularizer=l2(0.01))(out) model = Model(input=main_inputs, output=predict) return model def buildBiLstm(): main_inputs = Input(shape=(maxlen,), dtype='int32', name='main_input') inputs = Embedding(max_features, embedding_size, input_length=maxlen, weights=[WordEm])(main_inputs) lstm1 = LSTM(100)(inputs) lstm2 = LSTM(200)(inputs) lstm1_back = LSTM(100, go_backwards=True)(inputs) # lstm2_back =LSTM(200,go_backwards=True)(inputs) out = merge([lstm1, lstm2, lstm1_back], mode='concat') out = Dense(200, activation='tanh')(out) predict = Dense(2, activation='softmax')(out) model = Model(input=main_inputs, output=predict) return model def buildCNNwithCTD(): nb_filter = 50 filter_sizes = (2, 3, 4) convs = [] main_inputs = Input(shape=(maxlen,), dtype='int32', name='main_input') inputs = Embedding(max_features, embedding_size, input_length=maxlen, weights=[WordEm])(main_inputs) for fsz in filter_sizes: conv = Convolution1D(nb_filter=nb_filter, filter_length=fsz, border_mode='valid', activation='relu', subsample_length=1)(inputs) pool = MaxPooling1D(pool_length=2)(conv) flatten = Flatten()(pool) convs.append(flatten) if len(filter_sizes) > 1: out = Merge(mode='concat')(convs) else: out = convs[0] ctdinput = Input(shape=(1,), dtype='int32', name='ctd_input') # ctdword = Embedding(4, 10, input_length=1, weights=[ctdWord])(ctdinput) ctdword = Embedding(4, 50, input_length=1)(ctdinput) ctdword = Dense(10)(ctdword) ctdf = Flatten()(ctdword) print(ctdWord.shape) outs = merge([out, ctdf], mode='concat') predict = Dense(2, activation='softmax')(outs) model = Model(input=[main_inputs, ctdinput], output=predict) return model def attLstm(): from keras.regularizers import l2 main_inputs = Input(shape=(maxlen,), dtype='int32', name='main_input') inputs = Embedding(max_features, embedding_size, input_length=maxlen, weights=[WordEm])(main_inputs) lstm1 = AttentionLSTM_t(100,W_regularizer=l2(0.01))(inputs) lstm1_back = AttentionLSTM_t(100, go_backwards=True)(inputs) out = merge([lstm1, lstm1_back], mode='concat') out = Dense(100, activation='tanh')(out) predict = Dense(2, activation='softmax')(out) model = Model(input=main_inputs, output=predict) return model # model =buildCNNwithCTD() model = buildModel() print('xxxxxx') pltname = 'modelcnn-ctd.png' savePath = 'result_ctd_score.txt' # savePath = 'result_ctd_crossSen.txt' def precision(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) return precision from keras.optimizers import adadelta ss = adadelta(clipnorm=0.5) model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=[precision, 'fbeta_score']) model.summary() from keras.utils.visualize_util import plot plot(model, to_file=pltname) print('Train...') def trainCTDModel(): model.fit([X_train, train], train_label, batch_size=batch_size, nb_epoch=2) score = model.evaluate([X_test, test], test_label, batch_size=batch_size) result = model.predict([X_test, test]) print(len(score)) for i in range(len(score)): print(score[i]) # result = model.predict([X_test,test]) np.savetxt(savePath, result, fmt="%.4f", delimiter=" ") def trainModel(): for i in range(3): model.fit([X_train], train_label, batch_size=batch_size, nb_epoch=1,validation_split=0.2,shuffle=True) score = model.evaluate([X_test], test_label, batch_size=batch_size) result = model.predict([X_test]) # print(len(score)) # for i in range(len(score)): # print('xxxx...',score[i]) np.savetxt('result_'+str(i)+'.txt', result, fmt="%.4f", delimiter=" ") trainModel()
def compile(self): self.model.compile(loss=losses.categorical_crossentropy, optimizer=optimizers.adadelta(), metrics=['accuracy'])
os.environ["CUDA_VISIBLE_DEVICES"] = "2, 3" input_shape = (224, 224, 3) num_classes = 10 vgg16_net = VGG_16(input_shape=input_shape, num_classes=num_classes) parallel_model = multi_gpu_model(vgg16_net, gpus=2) epochs = 200 model_name = "VGG16-1" train_dir = r'/home/lst/datasets/cifar-10-images_train/' test_dir = r'/home/lst/datasets/cifar-10-images_test/' batch_size = 64 target_weight_height = (224, 224) adadelta = optimizers.adadelta(lr=1.0, rho=0.95, epsilon=1e-08, decay=0.0) parallel_model.compile(loss=['categorical_crossentropy'], optimizer=adadelta, metrics=["accuracy"]) # callbacks tensorboard = TensorBoard(log_dir=f'./logs/{model_name}', histogram_freq=0, write_graph=True, write_images=False) early_stopping = EarlyStopping(monitor='val_loss', patience=30, verbose=1) mc = ModelCheckpoint(f"{model_name}.h5", monitor='vac_acc', mode="max", verbose=1, save_best_only=True) cb_list = [tensorboard, early_stopping, mc]
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout input_tensor = Input((80, 170, 3)) x = input_tensor for i in range(4): x = Conv2D(filters=32 * 2 ** i, kernel_size=(3, 3), activation='relu')(x) x = Conv2D(filters=32 * 2 ** i, kernel_size=(3, 3), activation='relu')(x) x = MaxPool2D((2, 2))(x) x = Flatten()(x) x = Dropout(0.25)(x) x = [Dense(36, activation='softmax', name='c%d' % (i + 1))(x) for i in range(4)] model = Model(inputs=input_tensor, outputs=x) model.compile(loss='categorical_crossentropy', optimizer=optimizers.adadelta(), metrics=['accuracy']) # 输出模型图片 from keras.utils.vis_utils import plot_model plot_model(model, to_file='CNN-model.png', show_shapes=True, show_layer_names=False) num_epochs = 10 for epoch in range(num_epochs): start = time.time() history = model.fit(train_imgs, train_labels, batch_size=256, shuffle=True, verbose=0) score = model.evaluate(test_imgs, test_labels, verbose=0) loss = history.history['loss'] train_acc = history.history['c1_acc'][0] * history.history['c2_acc'][0] * \ history.history['c3_acc'][0] * history.history['c4_acc'][0] test_acc = score[5] * score[6] * score[7] * score[8]
def create_model(activation, optimizer, learning_rate, output_size, merged_layers): original_new_androdet_model = models.load_model( "../new_androdet/model_trained.k") original_cnn_model = models.load_model("../cnn/model_trained.k") original_dnn_model = models.load_model("../bow/model_trained.k") new_androdet_model = models.Sequential() cnn_model = models.Sequential() dnn_model = models.Sequential() for layer in original_new_androdet_model.layers[:-1]: layer.name = 'new_androdet_' + layer.name layer.trainable = False new_androdet_model.add(layer) for layer in original_cnn_model.layers[:-1]: layer.name = 'cnn_' + layer.name layer.trainable = False cnn_model.add(layer) for layer in original_dnn_model.layers[:-1]: layer.name = 'dnn_' + layer.name layer.trainable = False dnn_model.add(layer) entropy_input_layer = layers.Input(shape=(1, ), name='entropy_input') merge_layer = layers.concatenate([ cnn_model.layers[-1].get_output_at(-1), dnn_model.layers[-1].get_output_at(-1), entropy_input_layer ]) for (i, n_neurons) in enumerate(merged_layers): merge_layer = layers.Dense(n_neurons, activation=activation, name='dense{}'.format(i))(merge_layer) output_trivial = layers.concatenate( [merge_layer, new_androdet_model.layers[-1].get_output_at(-1)]) output_trivial = layers.Dense(1, activation='sigmoid')(output_trivial) output_rest = layers.Dense(output_size - 1, activation='sigmoid')(merge_layer) output_all = layers.concatenate([output_trivial, output_rest]) model = models.Model(inputs=[ new_androdet_model.layers[0].get_input_at(-1), cnn_model.layers[0].get_input_at(-1), dnn_model.layers[0].get_input_at(-1), entropy_input_layer ], outputs=output_all) if optimizer == 'rmsprop': opt = optimizers.rmsprop(lr=learning_rate) elif optimizer == 'adam': opt = optimizers.adam(lr=learning_rate) elif optimizer == 'sgd': opt = optimizers.sgd(lr=learning_rate) elif optimizer == 'adagrad': opt = optimizers.adagrad(lr=learning_rate) elif optimizer == 'adadelta': opt = optimizers.adadelta(lr=learning_rate) elif optimizer == 'adamax': opt = optimizers.adamax(lr=learning_rate) elif optimizer == 'nadam': opt = optimizers.nadam(lr=learning_rate) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=["mean_squared_error"]) model.summary() return model
def train_kitti(): # config for data argument cfg = config.Config() cfg.use_horizontal_flips = True cfg.use_vertical_flips = True cfg.rot_90 = True cfg.num_rois = 32 cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path()) # cfg.base_net_weights=r'' # TODO: the only file should to be change for other data to train cfg.model_path = '/media/private/Ci/log/plane/frcnn/vgg-adam' now = datetime.datetime.now() day = now.strftime('%y-%m-%d') for i in range(10000): if not os.path.exists('%s-%s-%d' % (cfg.model_path, day, i)): cfg.model_path = '%s-%s-%d' % (cfg.model_path, day, i) break make_dir(cfg.model_path) make_dir(cfg.model_path + '/loss') make_dir(cfg.model_path + '/loss_rpn_cls') make_dir(cfg.model_path + '/loss_rpn_regr') make_dir(cfg.model_path + '/loss_class_cls') make_dir(cfg.model_path + '/loss_class_regr') cfg.simple_label_file = '/media/public/GEOWAY/plane/plane0817.csv' all_images, classes_count, class_mapping = get_data(cfg.simple_label_file) if 'bg' not in classes_count: classes_count['bg'] = 0 class_mapping['bg'] = len(class_mapping) cfg.class_mapping = class_mapping cfg.config_save_file = os.path.join(cfg.model_path, 'config.pickle') with open(cfg.config_save_file, 'wb') as config_f: pickle.dump(cfg, config_f) print( 'Config has been written to {}, and can be loaded when testing to ensure correct results' .format(cfg.config_save_file)) inv_map = {v: k for k, v in class_mapping.items()} print('Training images per class:') pprint.pprint(classes_count) print('Num classes (including bg) = {}'.format(len(classes_count))) random.shuffle(all_images) num_imgs = len(all_images) train_imgs = [s for s in all_images if s['imageset'] == 'trainval'] val_imgs = [s for s in all_images if s['imageset'] == 'test'] print('Num train samples {}'.format(len(train_imgs))) print('Num val samples {}'.format(len(val_imgs))) data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, cfg, nn.get_img_output_length, K.image_dim_ordering(), mode='train') data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, cfg, nn.get_img_output_length, K.image_dim_ordering(), mode='val') Q = multiprocessing.Manager().Queue(maxsize=30) def fill_Q(n): while True: if not Q.full(): Q.put(next(data_gen_train)) #print(Q.qsize(),'put',n) else: time.sleep(0.00001) threads = [] for i in range(4): thread = multiprocessing.Process(target=fill_Q, args=(i, )) threads.append(thread) thread.start() if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) else: input_shape_img = (None, None, 3) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(None, 4)) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios) rpn = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(shared_layers, roi_input, cfg.num_rois, nb_classes=len(classes_count), trainable=True) model_rpn = Model(img_input, rpn[:2]) model_classifier = Model([img_input, roi_input], classifier) # this is a model that holds both the RPN and the classifier, used to load/save weights for the models model_all = Model([img_input, roi_input], rpn[:2] + classifier) # model_all.summary() from keras.utils import plot_model # os.environ['PATH'] = os.environ['PATH'] + r';C:\Program Files (x86)\Graphviz2.38\bin;' plot_model(model_all, 'model_all.png', show_layer_names=True, show_shapes=True) plot_model(model_classifier, 'model_classifier.png', show_layer_names=True, show_shapes=True) plot_model(model_rpn, 'model_rpn.png', show_layer_names=True, show_shapes=True) ''' try: print('loading weights from {}'.format(cfg.base_net_weights)) model_rpn.load_weights(cfg.model_path, by_name=True) model_classifier.load_weights(cfg.model_path, by_name=True) except Exception as e: print(e) print('Could not load pretrained model weights. Weights can be found in the keras application folder ' 'https://github.com/fchollet/keras/tree/master/keras/applications') ''' optimizer = adadelta() optimizer_classifier = adadelta() model_rpn.compile(optimizer=optimizer, loss=[ losses_fn.rpn_loss_cls(num_anchors), losses_fn.rpn_loss_regr(num_anchors) ]) model_classifier.compile( optimizer=optimizer_classifier, loss=[ losses_fn.class_loss_cls, losses_fn.class_loss_regr(len(classes_count) - 1) ], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'}) model_all.compile(optimizer='sgd', loss='mae') epoch_length = 10 num_epochs = int(cfg.num_epochs) iter_num = 0 losses = np.zeros((epoch_length, 5)) rpn_accuracy_rpn_monitor = [] rpn_accuracy_for_epoch = [] start_time = time.time() best_loss = np.Inf best_rpn_cls = np.Inf best_rpn_regr = np.Inf best_class_cls = np.Inf best_class_regr = np.Inf class_mapping_inv = {v: k for k, v in class_mapping.items()} print('Starting training') vis = True for epoch_num in range(num_epochs): progbar = generic_utils.Progbar(epoch_length) print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) while True: try: if len(rpn_accuracy_rpn_monitor ) == epoch_length and cfg.verbose: mean_overlapping_bboxes = float( sum(rpn_accuracy_rpn_monitor)) / len( rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] print( 'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations' .format(mean_overlapping_bboxes, epoch_length)) if mean_overlapping_bboxes == 0: print( 'RPN is not producing bounding boxes that overlap' ' the ground truth boxes. Check RPN settings or keep training.' ) # X, Y, img_data = next(data_gen_train) while True: if Q.empty(): time.sleep(0.00001) continue X, Y, img_data = Q.get() # print(Q.qsize(),'get') break # print(X.shape,Y.shape) loss_rpn = model_rpn.train_on_batch(X, Y) P_rpn = model_rpn.predict_on_batch(X) result = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], cfg, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=300) # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format X2, Y1, Y2, IouS = roi_helpers.calc_iou( result, img_data, cfg, class_mapping) if X2 is None: rpn_accuracy_rpn_monitor.append(0) rpn_accuracy_for_epoch.append(0) continue neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0) if len(neg_samples) > 0: neg_samples = neg_samples[0] else: neg_samples = [] if len(pos_samples) > 0: pos_samples = pos_samples[0] else: pos_samples = [] rpn_accuracy_rpn_monitor.append(len(pos_samples)) rpn_accuracy_for_epoch.append((len(pos_samples))) if cfg.num_rois > 1: if len(pos_samples) < cfg.num_rois // 2: selected_pos_samples = pos_samples.tolist() else: selected_pos_samples = np.random.choice( pos_samples, cfg.num_rois // 2, replace=False).tolist() try: selected_neg_samples = np.random.choice( neg_samples, cfg.num_rois - len(selected_pos_samples), replace=False).tolist() except: selected_neg_samples = np.random.choice( neg_samples, cfg.num_rois - len(selected_pos_samples), replace=True).tolist() sel_samples = selected_pos_samples + selected_neg_samples else: # in the extreme case where num_rois = 1, we pick a random pos or neg sample selected_pos_samples = pos_samples.tolist() selected_neg_samples = neg_samples.tolist() if np.random.randint(0, 2): sel_samples = random.choice(neg_samples) else: sel_samples = random.choice(pos_samples) loss_class = model_classifier.train_on_batch( [X, X2[:, sel_samples, :]], [Y1[:, sel_samples, :], Y2[:, sel_samples, :]]) losses[iter_num, 0] = loss_rpn[1] losses[iter_num, 1] = loss_rpn[2] losses[iter_num, 2] = loss_class[1] losses[iter_num, 3] = loss_class[2] losses[iter_num, 4] = loss_class[3] iter_num += 1 progbar.update( iter_num, [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])), ('detector_cls', np.mean(losses[:iter_num, 2])), ('detector_regr', np.mean(losses[:iter_num, 3]))]) if iter_num == epoch_length: loss_rpn_cls = np.mean(losses[:, 0]) loss_rpn_regr = np.mean(losses[:, 1]) loss_class_cls = np.mean(losses[:, 2]) loss_class_regr = np.mean(losses[:, 3]) class_acc = np.mean(losses[:, 4]) mean_overlapping_bboxes = float(sum( rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch) rpn_accuracy_for_epoch = [] if cfg.verbose: print( 'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}' .format(mean_overlapping_bboxes)) print( 'Classifier accuracy for bounding boxes from RPN: {}' .format(class_acc)) print('Loss RPN classifier: {}'.format(loss_rpn_cls)) print('Loss RPN regression: {}'.format(loss_rpn_regr)) print('Loss Detector classifier: {}'.format( loss_class_cls)) print('Loss Detector regression: {}'.format( loss_class_regr)) print('Elapsed time: {}'.format(time.time() - start_time)) curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr iter_num = 0 start_time = time.time() if curr_loss < best_loss: if cfg.verbose: print( 'Total loss decreased from {} to {}, saving weights' .format(best_loss, curr_loss)) best_loss = curr_loss model_all.save_weights( '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5' % (cfg.model_path, 'loss', epoch_num, curr_loss, loss_rpn_cls, loss_rpn_regr, loss_class_cls, loss_class_regr)) if loss_rpn_cls < best_rpn_cls: if cfg.verbose: print( 'loss_rpn_cls decreased from {} to {}, saving weights' .format(best_rpn_cls, loss_rpn_cls)) best_rpn_cls = loss_rpn_cls model_all.save_weights( '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5' % (cfg.model_path, 'loss_rpn_cls', epoch_num, curr_loss, loss_rpn_cls, loss_rpn_regr, loss_class_cls, loss_class_regr)) if loss_rpn_regr < best_rpn_regr: if cfg.verbose: print( 'loss_rpn_regr decreased from {} to {}, saving weights' .format(best_rpn_regr, loss_rpn_regr)) best_rpn_regr = loss_rpn_regr model_all.save_weights( '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5' % (cfg.model_path, 'loss_rpn_regr', epoch_num, curr_loss, loss_rpn_cls, loss_rpn_regr, loss_class_cls, loss_class_regr)) if loss_class_cls < best_class_cls: if cfg.verbose: print( 'loss_class_cls decreased from {} to {}, saving weights' .format(best_loss, loss_class_cls)) best_class_cls = loss_class_cls model_all.save_weights( '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5' % (cfg.model_path, 'loss_class_cls', epoch_num, curr_loss, loss_rpn_cls, loss_rpn_regr, loss_class_cls, loss_class_regr)) if loss_class_regr < best_class_regr: if cfg.verbose: print( 'loss_class_regr decreased from {} to {}, saving weights' .format(best_loss, loss_class_regr)) best_class_regr = loss_class_regr model_all.save_weights( '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5' % (cfg.model_path, 'loss_class_regr', epoch_num, curr_loss, loss_rpn_cls, loss_rpn_regr, loss_class_cls, loss_class_regr)) break except Exception as e: # print('Exception: {}'.format(e)) # save model # model_all.save_weights(cfg.model_path) continue print('Training complete, exiting.')
def Model5(input_tensor=None, train=False, re_train=False, x_train_more=[], y_train_more=[], retrain_num=0): nb_classes = 10 # convolution kernel size kernel_size = (5, 5) nb_epoch = 1 if train: batch_size = 256 # input image dimensions img_rows, img_cols = 28, 28 # the data, shuffled and split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 if re_train: x_train = np.append(x_train, x_train_more, axis=0) y_train = np.append(y_train, y_train_more, axis=0) # convert class vectors to binary class matrices y_train = to_categorical(y_train, nb_classes) y_test = to_categorical(y_test, nb_classes) input_tensor = Input(shape=input_shape) elif input_tensor is None: print(bcolors.FAIL + 'you have to proved input_tensor when testing') exit() # block1 x = Convolution2D(6, kernel_size, activation='relu', padding='same', name='block1_conv1')(input_tensor) x = MaxPooling2D(pool_size=(2, 2), name='block1_pool1')(x) # block2 x = Convolution2D(16, kernel_size, activation='relu', padding='same', name='block2_conv1')(x) x = MaxPooling2D(pool_size=(2, 2), name='block2_pool1')(x) x = Flatten(name='flatten')(x) x = Dense(120, activation='relu', name='fc1')(x) x = Dense(84, activation='relu', name='fc2')(x) x = Dense(100, activation='relu', name='fc3')(x) x = Dense(100, activation='relu', name='fc4')(x) x = Dense(100, activation='relu', name='fc5')(x) x = Dense(100, activation='relu', name='fc6')(x) x = Dense(100, activation='relu', name='fc7')(x) x = Dense(100, activation='relu', name='fc8')(x) x = Dense(132, activation='relu', name='fc9')(x) x = Dense(nb_classes, name='before_softmax')(x) x = Activation('softmax', name='predictions')(x) model = Model(input_tensor, x) if train: optim = optimizers.adadelta(lr=0.1) model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']) if re_train: model.load_weights('./Model5_' + str(retrain_num) + '.h5') # trainig model.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=batch_size, epochs=nb_epoch, verbose=1) if re_train: model.save_weights('./Model5_' + str(retrain_num + 1) + '.h5') else: model.save_weights('./Model5.h5') score = model.evaluate(x_test, y_test, verbose=0) print('\n') print('Overall Test score:', score[0]) print('Overall Test accuracy:', score[1]) # plot_loss_curve(model) # plot_acc_curve(model) return score[1] else: model.load_weights('./Model5' + '_' + str(retrain_num) + '.h5') print(bcolors.OKBLUE + 'Model5' + '_' + str(retrain_num) + ' loaded' + bcolors.ENDC) return model
win, 200, input_shape=(len(word_dict), 150, 200))) seq.add(Activation('tanh')) seq.add(MaxPooling2D(pool_size=(150 - win + 1, 1))) seq.add(Flatten()) # seq.add(Dropout(dropout_rate)) sequence_list.append(seq) print "build the model" model = Sequential() model.add(Merge(sequence_list, mode='concat')) model.add(Dense(5, W_constraint=maxnorm(max_norm))) model.add(Activation('softmax')) optimizer = adadelta() model.compile(loss='categorical_crossentropy', optimizer=optimizer) # print train the model # test_label = np_utils.to_categorical(test_label, 5) # train = train[0:100] # train_label = train_label[0:100] # # test = test[0:100] # test_label = test_label[0:100] train_label_cat = np_utils.to_categorical(train_label, 5) print train_label_cat.shape test_backup = copy.deepcopy(test_label) test_label_cat = np_utils.to_categorical(test_label, 5)
def create_optimizer_instance(self, **d): return optimizers.adadelta(**d)