G2 = data_loader(dir_img=val_images_path, dir_seg=val_segs_path, dir_softmax=val_softmax_path, batch_size=batch_sz, h=h_img, w=w_img, num_classes=num_cl, resize=rsize) print("FIT_1_LOSS_CATEGORICAL_CROSSENTROPY") cb_tensorBoard = TensorBoard(log_dir=pathTBoard, histogram_freq=0, write_graph=True, write_grads=False, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None, update_freq='epoch') cb_earlyStopping = EarlyStopping(monitor='val_categorical_accuracy', patience=10, min_delta=0, restore_best_weights=True) cb_modelCheckPoint = ModelCheckpoint(filepath=pathTChPoints + 'checkpoint-{epoch:02d}-{val_loss:.2f}.hdf5', monitor='val_loss', save_best_only=False, save_weights_only=True, mode='auto', period=2) logLr = pathTBoard + "/logLR" if not os.path.isdir(logLr): os.mkdir(logLr) cb_tensorBoardLR = TensorboardLR(log_dir=logLr) cb_mIou = CallbackmIoU(path, lr=lr_p, pathRGB=val_images_path, pathSEG=val_segs_path, pathSoft=val_softmax_path, pathGraphs=pathTBoard, lr_base_on_epochs=baseOnEpochs, max_iter=maxIter, poly_lr=poly_lr) print("FIT_1_LOSS_CATEGORICAL_CROSSENTROPY") deeplab_model.compile(optimizer=optimizers.SGD(lr=lr_p, momentum=0.9, decay=0, nesterov=True),
def train_nn(X, y, hidden_layers=(128,), activation='relu', dropout=0.4, epochs=20, batch_size=32, validation_split=None, validation_data=None, patience=4, shuffle=True, optimizer='adam', pt='model.h5'): """train a classification neural-net. Args: X (array-like): 1d or 2d array of features. y (array-like): 1d array of labels. hidden_layers (tuple): hidden layer size. activation (str): activation, default is `relu`. dropout (float): dropout rate. default is 0.4. epochs (int): training steps. default is 20. batch_size (int): batch size. default is 32. validation_split (float): validation split. default is 0.1. validation_data (tuple): tuple of (X_valid, y_valid). patience (int): num of "bad epochs" to wait before stopping the training. shuffle (bool): shuffle training data before each epoch. default is true. optimizer (str): optimizer name. default is Adam. pt (str): checkpoint file path. Returns: ``keras.models.Sequential`` """ n_classes = np.unique(y).shape[0] model = Sequential() input_dim = X.shape[1] for hidden_layer in hidden_layers: model.add(Dense(hidden_layer, activation=activation, input_dim=input_dim)) if dropout: model.add(Dropout(dropout)) input_dim = hidden_layer model.add(Dense(1 if n_classes == 2 else n_classes, activation='sigmoid' if n_classes == 2 else 'softmax')) model.compile(optimizer=optimizer, loss='binary_crossentropy' if n_classes == 2 else 'categorical_crossentropy', metrics=['accuracy']) callbacks = [ ReduceLROnPlateau(), EarlyStopping(patience=patience), ModelCheckpoint(filepath=pt, save_best_only=True) ] model.fit(X, y, epochs=epochs, batch_size=batch_size, validation_split=validation_split, validation_data=validation_data, shuffle=shuffle, callbacks=callbacks) return model
def neural_net_train(model, x_train, y_train, val_split=0.1, validation_data=None, fig_prefix=''): """ 训练神经网络 :return: """ print('Training the model: ' + model_store_path) """ earlystopping and modelcheckpoint """ # Callbacks are passed to the model fit the `callbacks` argument in `fit`, # which takes a list of callbacks. You can pass any number of callbacks. callbacks_list = [ # This callback will interrupt training when a monitored quantity has stopped improving. EarlyStopping( # This callback will monitor the validation accuracy of the model monitor='val_acc', # Training will be interrupted when the validation accuracy has stopped improving for 3 epochs patience=5, ), # This callback will save the current weights after every epoch ModelCheckpoint( filepath=model_store_path, # Path to the destination model file # The two arguments below mean that we will not overwrite the # model file unless `val_acc` has improved, which # allows us to keep the best model every seen during training. monitor='val_acc', save_best_only=True, ), ReduceLROnPlateau( # This callback will monitor the validation loss of the model monitor='val_loss', # It will divide the learning by 10 when it gets triggered factor=0.1, # It will get triggered after the validation loss has stopped improving # for at least 10 epochs patience=5, ) ] # Note that since the callback will be monitoring validation accuracy, # we need to pass some `validation_data` to our call to `fit`. model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHES, callbacks=callbacks_list, validation_split=val_split) # acc curve during the training fig = plt.figure() acc = model.history.history['acc'] val_acc = model.history.history['val_acc'] epochs = range(1, len(acc) + 1) plt.plot(epochs, acc, '-ko', label='Train accuracy') plt.plot(epochs, val_acc, '-k^', label='Validation accuracy') plt.title('Train and validation accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend(['train', 'validation'], loc='lower right') plt.xlim((1, len(acc) + 1)) plt.xticks(np.arange(1, len(acc) + 1, 1)) fig.savefig(fig_prefix + '#accuracy-curve.eps') plt.close(fig) # loss curve during the training fig = plt.figure() plt.plot(epochs, model.history.history['loss'], '-ko') plt.plot(epochs, model.history.history['val_loss'], '-k^') plt.title('Train and validation loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Validation'], loc='upper right') plt.xlim((1, len(acc) + 1)) plt.xticks(np.arange(1, len(acc) + 1, 1)) fig.savefig(fig_prefix + '#loss-curve.eps') plt.close(fig) return model
class_mode='categorical') # Max number of steps that these generator will have opportunity to process their source content # len(train_generator) should be 'no. of available train images / BATCH_SIZE_TRAINING' # len(valid_generator) should be 'no. of available train images / BATCH_SIZE_VALIDATION' (BATCH_SIZE_TRAINING, len(train_generator), BATCH_SIZE_VALIDATION, len(validation_generator)) # ### Train Our Model With Train (splitted) Data Set # Early stopping & checkpointing the best model in ../working dir & restoring that as our model for prediction from tensorflow.python.keras.callbacks import EarlyStopping, ModelCheckpoint cb_early_stopper = EarlyStopping(monitor='val_loss', patience=EARLY_STOP_PATIENCE) cb_checkpointer = ModelCheckpoint(filepath='working/best.hdf5', monitor='val_loss', save_best_only=True, mode='auto') # ### Start Training Network import time Time_start = time.time() fit_history = model.fit_generator( train_generator, steps_per_epoch=STEPS_PER_EPOCH_TRAINING, epochs=NUM_EPOCHS, validation_data=validation_generator, validation_steps=STEPS_PER_EPOCH_VALIDATION, callbacks=[cb_checkpointer, cb_early_stopper]) Time_end = time.time()
type_gen='test') # Design model model = create_model_pretrain(dim, n_sequence, n_channels, n_output) start_epoch = 0 # Load weight of unfinish training model(optional) load_model = False if load_model: weights_path = 'save_weight/weight-300-0.73-0.81.hdf5' # name of model start_epoch = 300 model.load_weights(weights_path) # Set callback validate_freq = 3 filepath = "save_weight/" + "weight-{epoch:02d}-{accuracy:.2f}-{val_accuracy:.2f}.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=False, period=validate_freq) callbacks_list = [checkpoint] # # Train model on dataset model.fit_generator(generator=training_generator, validation_data=validation_generator, epochs=600, callbacks=callbacks_list, initial_epoch=start_epoch, validation_freq=validate_freq)
def run(logger, options, session_log_file, logits_dir, models_dir): logger.info(cfg.student_train_spacer + "GENERIC MULTISTAGE" + cfg.student_train_spacer) with open(session_log_file, "w") as f: f.write("begin test: " + datetime.datetime.now().isoformat() + "\n") f.close() # load configuration file configuration = import_config(options.config_file_path) teacher_name = configuration['teacher_name'] epochs = configuration['epochs'] temperatures = configuration['temp_config'] alphas = configuration['alpha_config'] order_combinations = configuration['size_combinations'] # loading training data X_train, Y_train, X_test, Y_test = load_dataset.load_dataset_by_name( logger, cfg.dataset) # mean subtraction regularization if cfg.subtract_pixel_mean is True: x_train_mean = np.mean(X_train, axis=0) X_train -= x_train_mean X_test -= x_train_mean if cfg.use_fit_generator_student is True or cfg.use_fit_generator_teacher is True: # data generator for on the fly training data manipulation datagen = ImageDataGenerator( # set input mean to 0 over the dataset featurewise_center=False, # set each sample mean to 0 samplewise_center=False, # divide inputs by std of dataset featurewise_std_normalization=False, # divide each input by its std samplewise_std_normalization=False, # apply ZCA whitening zca_whitening=False, # epsilon for ZCA whitening zca_epsilon=1e-06, # randomly rotate images in the range (deg 0 to 180) rotation_range=0, # randomly shift images horizontally width_shift_range=0.1, # randomly shift images vertically height_shift_range=0.1, # set range for random shear shear_range=0., # set range for random zoom zoom_range=0., # set range for random channel shifts channel_shift_range=0., # set mode for filling points outside the input boundaries fill_mode='nearest', # value used for fill_mode = "constant" cval=0., # randomly flip images horizontal_flip=True, # randomly flip images vertical_flip=False, # set rescaling factor (applied before any other transformation) rescale=None, # set function that will be applied on each input preprocessing_function=None, # image data format, either "channels_first" or "channels_last" data_format=None, # fraction of images reserved for validation (strictly between 0 and 1) validation_split=0.0) datagen.fit(X_train) try: for order in order_combinations: for temp in temperatures: # clearing all saved teacher logits for alpha in alphas: tf.keras.backend.clear_session( ) # must clear the current session to free memory! K.clear_session( ) # must clear the current session to free memory! logger.info( "Clearing tensorflow/keras backend session and de-allocating remaining models..." ) model = None previousModel = None if teacher_name is not None: ssm = model_loader(logger, options.teacherModel) previousModel = ssm.get_loaded_model() teacher_name = options.teacherModel # creating experiment1 metadata experiment_result = { "experiment_results": [] } # empty space for our experiment1's data experiment_metadata = create_meta(cfg.dataset, teacher_name, epochs, temp, alpha, order) experiment_result['metadata'] = experiment_metadata # performing experiment on given size, alpha, and temperature combination for net_size in order: model = None # perform KD if there is a previously trained model to work with if previousModel is not None: model = knowledge_distillation_models.get_model( cfg.dataset, cfg.dataset_num_classes, X_train, net_size, ) logger.info( "loading soft targets for student training...") print("previous model to load logits for: %s" % str(previousModel)) teacher_train_logits, teacher_test_logits = get_pretrained_teacher_logits( logits_dir, previousModel, alpha, cfg.dataset, order) Y_train_new, Y_test_new = teacher_utils.convert_logits_to_soft_targets( temp, teacher_train_logits, teacher_test_logits, Y_train, Y_test) # # TODO remove next three lines # file_name = "/home/blakete/" + temp + "_" + previousModel + "_training_labels.npy" # filehandler = open(file_name, 'wb') # pickle.dump(Y_train_new, filehandler) # pickle.dump(Y_test_new, filehandler) if Y_train_new is None or Y_test_new is None: logger.info( "soft targets not loaded correctly!") else: logger.info("completed") # filehandler = open("mnist_10_soft_targets.pkl", 'wb') # pickle.dump(Y_train_new, filehandler) # pickle.dump(Y_test_new, filehandler) model = helper_util.apply_knowledge_distillation_modifications( logger, model, temp) # model = multi_gpu_model(model, gpus=4) optimizer = get_optimizer( cfg.student_optimizer) model.compile( optimizer=optimizer, loss=lambda y_true, y_pred: helper_util. knowledge_distillation_loss( logger, y_true, y_pred, alpha), metrics=[helper_util.acc]) logger.info( "training model...\norder:%s\nsize:%d\ntemp:%d\nalpha:%f" % (order, net_size, temp, alpha)) callbacks = [ EarlyStopping(monitor='val_acc', patience=50, min_delta=0.00007), # ReduceLROnPlateau(monitor='val_acc', factor=0.1, patience=4, min_lr=0.0001), ModelCheckpoint(cfg.checkpoint_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max') ] if cfg.use_fit_generator_student is True: model.fit(datagen.flow( X_train, Y_train_new, batch_size=cfg.student_batch_size), validation_data=(X_test, Y_test_new), epochs=epochs, verbose=1, callbacks=callbacks) else: model.fit( X_train, Y_train_new, batch_size=cfg.student_batch_size, epochs=epochs, verbose=1, callbacks=callbacks, validation_data=(X_test, Y_test_new)) # model = helper_util.revert_knowledge_distillation_modifications(logger, model) del model # train_score, val_score = helper_util.calculate_unweighted_score(logger, model, X_train, Y_train, # X_test, Y_test) model = knowledge_distillation_models.get_model( cfg.dataset, cfg.dataset_num_classes, X_train, net_size, ) # model.summary() # load best model from checkpoint for evaluation model.load_weights(cfg.checkpoint_path) optimizer = get_optimizer( cfg.student_optimizer) model.compile( optimizer=optimizer, loss= logloss, # the same as the custom loss function metrics=['accuracy']) train_score = model.evaluate(X_train, Y_train, verbose=0) val_score = model.evaluate(X_test, Y_test, verbose=0) result = create_result(net_size, temp, alpha, train_score, val_score) logger.info(result) experiment_result["experiment_results"].append( result) # # remove checkpoint of best model for new checkpoint # os.remove(cfg.checkpoint_path) # save the trained model the saved model directory saved_model(logger, cfg.dataset, net_size, alpha, val_score, order, model, models_dir) if order.index(net_size) < len(order) - 1: # save soft targets logger.info( "creating student training data...") Y_train_new, Y_test_new = teacher_utils.createStudentTrainingData( model, temp, X_train, Y_train, X_test, Y_test) save_pretrained_teacher_logits( logits_dir, net_size, alpha, Y_train_new, Y_test_new, cfg.dataset, order) logger.info("done.") else: logger.info( "skipping creation of student training data, we are @ target model..." ) # clear soft targets Y_train_new = None Y_test_new = None # set model to current net size to preserve in previousModel model = net_size # if no previously trained model, train the network else: # load the already created soft targets Y_train_new = None Y_test_new = None val_score = None teacher_train_logits, teacher_test_logits = get_pretrained_teacher_logits( logits_dir, net_size, alpha, cfg.dataset, order) # train network if not previously created logits if teacher_train_logits is None or teacher_test_logits is None: if os.path.isfile(cfg.checkpoint_path): logger.info("removing previous checkpoint") os.remove(cfg.checkpoint_path ) # remove previous checkpoint logger.info( "training teacher model...\norder:%s\nsize:%d\ntemp:%d\nalpha:%f" % (order, net_size, temp, alpha)) model = knowledge_distillation_models.get_model( cfg.dataset, cfg.dataset_num_classes, X_train, net_size, ) # model.summary() optimizer = get_optimizer( cfg.start_teacher_optimizer) model.compile( optimizer=optimizer, loss= logloss, # the same as the custom loss function metrics=['accuracy']) # train network and save model with bet validation accuracy to cfg.checkpoint_path callbacks = [ EarlyStopping(monitor='val_acc', patience=50, min_delta=0.00007), # ReduceLROnPlateau(monitor='val_acc', factor=0.1, patience=4, min_lr=0.0001), ModelCheckpoint(cfg.checkpoint_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max') ] if cfg.use_fit_generator_teacher is True: model.fit(datagen.flow( X_train, Y_train, batch_size=cfg.student_batch_size), validation_data=(X_test, Y_test), epochs=epochs, verbose=1, callbacks=callbacks) else: model.fit( X_train, Y_train, validation_data=(X_test, Y_test), batch_size=cfg.student_batch_size, epochs=epochs, verbose=1, callbacks=callbacks) # load best model from checkpoint for evaluation del model model = knowledge_distillation_models.get_model( cfg.dataset, cfg.dataset_num_classes, X_train, net_size, ) model.load_weights(cfg.checkpoint_path) optimizer = get_optimizer( cfg.start_teacher_optimizer) model.compile( optimizer=optimizer, loss= logloss, # the same as the custom loss function metrics=['accuracy']) # evaluate network train_score, val_score = helper_util.calculate_unweighted_score( logger, model, X_train, Y_train, X_test, Y_test) # save evaluation result = create_result(net_size, None, None, train_score, val_score) logger.info(result) experiment_result["experiment_results"].append( result) if len(order) != 1: logger.info( "creating student training data...") teacher_train_logits, teacher_test_logits = teacher_utils.createStudentTrainingData( model, temp, X_train, Y_train, X_test, Y_test) save_pretrained_teacher_logits( logits_dir, net_size, alpha, teacher_train_logits, teacher_test_logits, cfg.dataset, order) logger.info( "done creating student training data.") # save the trained model the saved model directory saved_model(logger, cfg.dataset, net_size, alpha, val_score, order, model, models_dir) # # remove checkpoint of best model for new checkpoint # os.remove(cfg.checkpoint_path) else: model = net_size # temporarily serialize model to load as teacher in following KD training to avoid errors del previousModel # free memory previousModel = net_size # previously trained model becomes teacher # appending experiment result to log file if os.path.isfile(session_log_file): open_type = 'a' else: open_type = 'w' with open(session_log_file, open_type) as f: f.write(json.dumps(experiment_result)) f.write("\n") f.close() # printing the results of training logger.info(cfg.student_train_spacer) # free model variables for next configuration iteration del model del previousModel logger.info('-- COMPLETE') except Exception: traceback.print_exc() error = traceback.format_exc() # error.upper() logging.error('Error encountered: %s' % error, exc_info=True)
train_y=train_y[0:train_size,:] train_index=train_index[0:train_size,:] train_label = train_label[0:train_size,:] val_size=100000 dir = '../dataset/Qbit/Q1val.mat' y_c = sio.loadmat(dir)['Y'] val_y=np.real(y_c*np.conj(y_c)) val_index = sio.loadmat(dir)['Index'] val_label = sio.loadmat('../dataset/channel/channel_val.mat')['Rad'] val_y=val_y[0:val_size,:] val_index=val_index[0:val_size,:] val_label = val_label[0:val_size,:] path = './model/Qbit_1.h5' checkpoint = ModelCheckpoint(path, monitor='val_loss', verbose=1, save_best_only=True, mode='min', save_weights_only=True) model = channel_estimation() # model.fit([train_y,train_index], train_label, batch_size=2048, epochs=100, verbose=2, # callbacks=[checkpoint], validation_data=([val_y,val_index], val_label), shuffle=True) model.load_weights(path) test_size=10000 test_label=sio.loadmat('../dataset/channel/channel_test.mat')['Rad'] test_label=test_label[0:test_size,:] Qbit=np.arange(1,8,1) # Fig6, curve6, q_tr=1 MSE = [] for Q in Qbit: dir = '../dataset/Qbit/pnr0/Q{Q}.mat' dir = dir.format(Q=Q)
for i in predictions: class_weights[c] = weight[c] c += 1 print(class_weights) NUM_EPOCHS = 60 BATCH_SIZE = 8 num_train_images = 5000 adam = Adam(lr=0.00001) finetune_model.compile(adam, loss='categorical_crossentropy', metrics=['accuracy']) filepath = 'C:\\Users\\Nipun\\Documents\\My Projects\\Hackerearth\\' + 'ResNet50\\' + '_model_weights.h5' checkpoint = ModelCheckpoint(filepath, monitor=["acc"], verbose=1, mode='max') callbacks_list = [checkpoint] history = finetune_model.fit_generator(train_generator, epochs=NUM_EPOCHS, workers=8, steps_per_epoch=num_train_images // BATCH_SIZE, shuffle=True, callbacks=callbacks_list, class_weight=class_weights) # Plot the training and validation loss + accuracy def plot_training(history): acc = history.history['acc']
# Train on multiple GPUs # from tensorflow.keras.utils import multi_gpu_model # model = multi_gpu_model(model, gpus = 2) model = mobile_face_net_train( NUM_LABELS, loss='softmax') # change the loss to 'arcface' for fine-tuning model.summary() model.layers model.compile(optimizer=Adam(lr=0.001, epsilon=1e-8), loss='categorical_crossentropy', metrics=['accuracy']) # Save the model after every epoch check_pointer = ModelCheckpoint(filepath='../Models/MobileFaceNet_train.h5', verbose=1, save_best_only=True) # Interrupt the training when the validation loss is not decreasing early_stopping = EarlyStopping(monitor='val_loss', patience=10000) # Record the loss history class LossHistory(Callback): def on_train_begin(self, logs={}): self.losses = [] def on_batch_end(self, batch, logs={}): self.losses.append(logs.get('loss'))
def train_model(train_images, train_depths, test_images, test_depths, save_best=True): # Initializes GPU to allow memory growth instead of static allocation # Resolves some intermittent initialization errors physical_devices = tf.config.list_physical_devices('GPU') try: tf.config.experimental.set_memory_growth(physical_devices[0], True) except: # Invalid device or cannot modify virtual devices once initialized. pass print("Training...") # fix random seed for reproducibility seed = 7 np.random.seed(seed) # Create the model # Convolutional autoencoder to transform a grayscale image to an inferred depth map model = Sequential() model.add( Conv2D(64, kernel_size=7, padding='same', activation='relu', input_shape=(75, 284, 1))) model.add(MaxPooling2D((3, 3), padding='same')) model.add(Conv2D(64, kernel_size=3, padding='same', activation='relu')) # model.add(MaxPooling2D((2, 2), padding='same')) # model.add(Conv2D(64, kernel_size=3, padding='same', activation='relu')) # model.add(MaxPooling2D((2, 2), padding='same')) model.add(Conv2D(128, kernel_size=3, padding='same', activation='relu')) model.add(Conv2D(128, kernel_size=3, padding='same', activation='relu')) model.add(Conv2D(256, kernel_size=3, padding='same', activation='relu')) model.add(Conv2D(256, kernel_size=3, padding='same', activation='relu')) model.add(Conv2D(256, kernel_size=3, padding='same', activation='relu')) # model.add(UpSampling2D((2, 2))) # model.add(Conv2D(256, kernel_size=3, padding='same', activation='relu')) # model.add(UpSampling2D((2, 2))) # model.add(Conv2D(32, kernel_size=3, padding='same', activation='relu')) model.add(UpSampling2D((3, 3))) model.add(Conv2D(1, kernel_size=1, activation='relu', padding='same')) model.add(Cropping2D(cropping=((0, 0), (0, 1)), data_format=None)) # Compile model model.compile(optimizer='adadelta', loss='mse', metrics=tf.keras.metrics.RootMeanSquaredError(name='rmse')) # Save the weights with lowest RMSE callbacks_list = [] if save_best: filepath = "best_depth_weights_last_run.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_rmse', verbose=1, save_best_only=True, mode='min') callbacks_list.append(checkpoint) # Train model history = model.fit(train_images, train_depths, batch_size=128, epochs=1500, validation_data=(test_images, test_depths), verbose=1, callbacks=callbacks_list) return model, history
dataSetValY = dataSetY[int(dataForTrainingPercent*len(dataSetY)):int((dataForTrainingPercent + dataForValidatingPercent)*len(dataSetY))] dataSetTestX = dataSetX[int((dataForTrainingPercent + dataForValidatingPercent)*len(dataSetX)):] dataSetTestY = dataSetY[int((dataForTrainingPercent + dataForValidatingPercent)*len(dataSetY)):] trainX = numpy.asarray(dataSetTrainX) trainX = trainX.reshape([-1, 128, 128, 1]) trainY = fit_trasform(dataSetTrainY, getAllGenres()) validX = numpy.asarray(dataSetValX) validX = validX.reshape([-1, 128, 128, 1]) validY = fit_trasform(dataSetValY, getAllGenres()) testX = numpy.asarray(dataSetTestX) testX = testX.reshape([-1, 128, 128, 1]) testY = fit_trasform(dataSetTestY, getAllGenres()) checkpoint = ModelCheckpoint("best_model_ever", monitor='val_acc', verbose=1, save_best_only=True, mode='max') tensorboard = TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=False) callbacks_list = [checkpoint, tensorboard] model.fit(trainX, trainY, validation_data=(validX, validY), epochs=20, batch_size=128, shuffle=True, callbacks=callbacks_list) print("model succesfully trained") model.save("model") testAccu = model.evaluate(testX, testY)[0] print(testAccu)
def __init__(self, data, periods=288, batch_size=64, sequence_length=20, warmup_steps=50, epochs=20, display=False): """Instantiate the class. Args: data: Dict of values keyed by timestamp periods: Number of timestamp data points per vector batch_size: Size of batch sequence_length: Length of vectors for for each target warmup_steps: Returns: None """ # Initialize key variables self.periods = periods self.target_names = ['value'] self.warmup_steps = warmup_steps self.epochs = epochs self.batch_size = batch_size self.display = display ################################### # TensorFlow wizardry config = tf.ConfigProto() # Don't pre-allocate memory; allocate as-needed config.gpu_options.allow_growth = True # Only allow a total of half the GPU memory to be allocated config.gpu_options.per_process_gpu_memory_fraction = 0.95 # Crash with DeadlineExceeded instead of hanging forever when your # queues get full/empty config.operation_timeout_in_ms = 60000 # Create a session with the above options specified. backend.tensorflow_backend.set_session(tf.Session(config=config)) ################################### # Get data (x_data, y_data) = convert_data(data, periods, self.target_names) print('\n> Numpy Data Type: {}'.format(type(x_data))) print("> Numpy Data Shape: {}".format(x_data.shape)) print("> Numpy Data Row[0]: {}".format(x_data[0])) print('> Numpy Targets Type: {}'.format(type(y_data))) print("> Numpy Targets Shape: {}".format(y_data.shape)) ''' This is the number of observations (aka. data-points or samples) in the data-set: ''' num_data = len(x_data) ''' This is the fraction of the data-set that will be used for the training-set: ''' train_split = 0.9 ''' This is the number of observations in the training-set: ''' self.num_train = int(train_split * num_data) ''' This is the number of observations in the test-set: ''' num_test = num_data - self.num_train print('> Number of Samples: {}'.format(num_data)) print("> Number of Training Samples: {}".format(self.num_train)) print("> Number of Test Samples: {}".format(num_test)) # Create test and training data x_train = x_data[0:self.num_train] x_test = x_data[self.num_train:] self.y_train = y_data[0:self.num_train] self.y_test = y_data[self.num_train:] self.num_x_signals = x_data.shape[1] self.num_y_signals = y_data.shape[1] print("> Training Minimum Value:", np.min(x_train)) print("> Training Maximum Value:", np.max(x_train)) ''' steps_per_epoch is the number of batch iterations before a training epoch is considered finished. ''' self.steps_per_epoch = int(self.num_train / batch_size) + 1 print("> Epochs:", epochs) print("> Batch Size:", batch_size) print("> Steps:", self.steps_per_epoch) ''' Calculate the estimated memory footprint. ''' print("> Data size: {:.2f} Bytes".format(x_data.nbytes)) ''' if memory_footprint > 7: print('\n\n{}\n\n'.format( '> Estimated GPU memory usage too large. Use new parameters ' 'to reduce the footprint.')) sys.exit(0) ''' ''' The neural network works best on values roughly between -1 and 1, so we need to scale the data before it is being input to the neural network. We can use scikit-learn for this. We first create a scaler-object for the input-signals. Then we detect the range of values from the training-data and scale the training-data. ''' x_scaler = MinMaxScaler() self.x_train_scaled = x_scaler.fit_transform(x_train) print('> Scaled Training Minimum Value: {}'.format( np.min(self.x_train_scaled))) print('> Scaled Training Maximum Value: {}'.format( np.max(self.x_train_scaled))) self.x_test_scaled = x_scaler.transform(x_test) ''' The target-data comes from the same data-set as the input-signals, because it is the weather-data for one of the cities that is merely time-shifted. But the target-data could be from a different source with different value-ranges, so we create a separate scaler-object for the target-data. ''' self.y_scaler = MinMaxScaler() self.y_train_scaled = self.y_scaler.fit_transform(self.y_train) y_test_scaled = self.y_scaler.transform(self.y_test) # Data Generator ''' The data-set has now been prepared as 2-dimensional numpy arrays. The training-data has almost 300k observations, consisting of 20 input-signals and 3 output-signals. These are the array-shapes of the input and output data: ''' print('> Scaled Training Data Shape: {}'.format( self.x_train_scaled.shape)) print('> Scaled Training Targets Shape: {}'.format( self.y_train_scaled.shape)) # We then create the batch-generator. generator = self.batch_generator(batch_size, sequence_length) # Validation Set ''' The neural network trains quickly so we can easily run many training epochs. But then there is a risk of overfitting the model to the training-set so it does not generalize well to unseen data. We will therefore monitor the model's performance on the test-set after each epoch and only save the model's weights if the performance is improved on the test-set. The batch-generator randomly selects a batch of short sequences from the training-data and uses that during training. But for the validation-data we will instead run through the entire sequence from the test-set and measure the prediction accuracy on that entire sequence. ''' validation_data = (np.expand_dims(self.x_test_scaled, axis=0), np.expand_dims(y_test_scaled, axis=0)) # Create the Recurrent Neural Network self.model = Sequential() ''' We can now add a Gated Recurrent Unit (GRU) to the network. This will have 512 outputs for each time-step in the sequence. Note that because this is the first layer in the model, Keras needs to know the shape of its input, which is a batch of sequences of arbitrary length (indicated by None), where each observation has a number of input-signals (num_x_signals). ''' self.model.add( GRU(units=512, return_sequences=True, input_shape=( None, self.num_x_signals, ))) ''' The GRU outputs a batch of sequences of 512 values. We want to predict 3 output-signals, so we add a fully-connected (or dense) layer which maps 512 values down to only 3 values. The output-signals in the data-set have been limited to be between 0 and 1 using a scaler-object. So we also limit the output of the neural network using the Sigmoid activation function, which squashes the output to be between 0 and 1.''' self.model.add(Dense(self.num_y_signals, activation='sigmoid')) ''' A problem with using the Sigmoid activation function, is that we can now only output values in the same range as the training-data. For example, if the training-data only has temperatures between -20 and +30 degrees, then the scaler-object will map -20 to 0 and +30 to 1. So if we limit the output of the neural network to be between 0 and 1 using the Sigmoid function, this can only be mapped back to temperature values between -20 and +30. We can use a linear activation function on the output instead. This allows for the output to take on arbitrary values. It might work with the standard initialization for a simple network architecture, but for more complicated network architectures e.g. with more layers, it might be necessary to initialize the weights with smaller values to avoid NaN values during training. You may need to experiment with this to get it working. ''' if False: # Maybe use lower init-ranges. # init = RandomUniform(minval=-0.05, maxval=0.05) init = RandomUniform(minval=-0.05, maxval=0.05) self.model.add( Dense(self.num_y_signals, activation='linear', kernel_initializer=init)) # Compile Model ''' This is the optimizer and the beginning learning-rate that we will use. We then compile the Keras model so it is ready for training. ''' optimizer = RMSprop(lr=1e-3) self.model.compile(loss=self.loss_mse_warmup, optimizer=optimizer) ''' This is a very small model with only two layers. The output shape of (None, None, 3) means that the model will output a batch with an arbitrary number of sequences, each of which has an arbitrary number of observations, and each observation has 3 signals. This corresponds to the 3 target signals we want to predict. ''' print('> Model Summary:\n') print(self.model.summary()) # Callback Functions ''' During training we want to save checkpoints and log the progress to TensorBoard so we create the appropriate callbacks for Keras. This is the callback for writing checkpoints during training. ''' path_checkpoint = '/tmp/23_checkpoint.keras' callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint, monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True) ''' This is the callback for stopping the optimization when performance worsens on the validation-set. ''' callback_early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1) ''' This is the callback for writing the TensorBoard log during training. ''' callback_tensorboard = TensorBoard(log_dir='/tmp/23_logs/', histogram_freq=0, write_graph=False) ''' This callback reduces the learning-rate for the optimizer if the validation-loss has not improved since the last epoch (as indicated by patience=0). The learning-rate will be reduced by multiplying it with the given factor. We set a start learning-rate of 1e-3 above, so multiplying it by 0.1 gives a learning-rate of 1e-4. We don't want the learning-rate to go any lower than this. ''' callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, min_lr=1e-4, patience=0, verbose=1) callbacks = [ callback_early_stopping, callback_checkpoint, callback_tensorboard, callback_reduce_lr ] # Train the Recurrent Neural Network '''We can now train the neural network. Note that a single "epoch" does not correspond to a single processing of the training-set, because of how the batch-generator randomly selects sub-sequences from the training-set. Instead we have selected steps_per_epoch so that one "epoch" is processed in a few minutes. With these settings, each "epoch" took about 2.5 minutes to process on a GTX 1070. After 14 "epochs" the optimization was stopped because the validation-loss had not decreased for 5 "epochs". This optimization took about 35 minutes to finish. Also note that the loss sometimes becomes NaN (not-a-number). This is often resolved by restarting and running the Notebook again. But it may also be caused by your neural network architecture, learning-rate, batch-size, sequence-length, etc. in which case you may have to modify those settings. ''' print('\n> Starting data training\n') try: self.model.fit_generator(generator=generator, epochs=self.epochs, steps_per_epoch=self.steps_per_epoch, validation_data=validation_data, callbacks=callbacks) except Exception as error: print('\n>{}\n'.format(error)) traceback.print_exc() sys.exit(0) # Load Checkpoint ''' Because we use early-stopping when training the model, it is possible that the model's performance has worsened on the test-set for several epochs before training was stopped. We therefore reload the last saved checkpoint, which should have the best performance on the test-set. ''' print('> Loading model weights') try: self.model.load_weights(path_checkpoint) except Exception as error: print('\n> Error trying to load checkpoint.\n\n{}'.format(error)) traceback.print_exc() sys.exit(0) # Performance on Test-Set ''' We can now evaluate the model's performance on the test-set. This function expects a batch of data, but we will just use one long time-series for the test-set, so we just expand the array-dimensionality to create a batch with that one sequence. ''' result = self.model.evaluate(x=np.expand_dims(self.x_test_scaled, axis=0), y=np.expand_dims(y_test_scaled, axis=0)) print('> Loss (test-set): {}'.format(result)) # If you have several metrics you can use this instead. if False: for res, metric in zip(result, self.model.metrics_names): print('{0}: {1:.3e}'.format(metric, res))
train_ratio = 0.8 seq_length = 128 output_count = 32 output_count_for_test = 32 pred_length = 4 symbols = ["EURUSD", "GBPUSD", "EURGBP"] layers = [50, 50] n_pp = 100 batch_size = 128 epochs = 100 mfile = './SavedModel/RNN/Seq2SeqLSTM.h5' model_saver = ModelCheckpoint(mfile, save_best_only=True, save_weights_only=True) gen = VaeGen(train_ratio, seq_length, output_count, symbols, test_output_count=pred_length, last_date=datetime(2019, 5, 28), num_samples=92160, timeframe=MetaTrader5.MT5_TIMEFRAME_M1) test = Seq2Seq(layers, n_pp) test.compile(gen.input_dim, gen.output_dim, optimizer='adam', loss='mae') # test.train_model.fit([gen.trainX, gen.trainY], gen.trainY, batch_size=batch_size, epochs=epochs, callbacks=[model_saver], # validation_data=([gen.validX, gen.validY], gen.validY))
np.random.seed(10101) np.random.shuffle(lines) np.random.seed(None) # 90%用于训练,10%用于估计。 num_val = int(len(lines) * 0.1) num_train = len(lines) - num_val # 建立AlexNet模型 model = AlexNet() # 保存的方式,3世代保存一次 checkpoint_period1 = ModelCheckpoint( log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='acc', save_weights_only=False, save_best_only=True, period=3 ) # 学习率下降的方式,acc三次不下降就下降学习率继续训练 reduce_lr = ReduceLROnPlateau( monitor='acc', factor=0.5, patience=3, verbose=1 ) # 是否需要早停,当val_loss一直不下降的时候意味着模型基本训练完毕,可以停止 early_stopping = EarlyStopping( monitor='val_loss', min_delta=0, patience=10,
# 3. model fitting config model.compile(loss='categorical_crossentropy', optimizer='adam', metrics = ['accuracy']) # 4. model check point model_directory = os.path.join(os.getcwd(), 'model') if not os.path.exists(model_directory): os.mkdir(model_directory) else: shutil.rmtree(model_directory) os.mkdir(model_directory) checkpoint = ModelCheckpoint( filepath=os.path.join(model_directory, '{epoch:03d}-{val_loss:.4f}.h5'), monitor='val_loss', # val_loss(시험셋 오차), loss(학습셋 오차), val_accuracy(시험셋 정확도) accuracy(학습셋 정확도) verbos=1, save_best_only=True ) # 5. model fitting history = model.fit(x, t, validation_split=0.2, epochs=200, batch_size=100, verbose=0, callbacks=[checkpoint]) # 6. result result = model.evaluate(x, t, verbose=0) print(f'\n(Loss, Accuracy) = ({result[0], result[1]}') # 7. graph val_loss = history.history['val_loss'] accuracy = history.history['accuracy']
model.add(Dense(1, activation='sigmoid')) # 我们使用adam以0.001的learning rate进行优化 optimizer = Adam(lr=1e-3) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) print(model.summary()) # 建立一个权重的存储点 path_checkpoint = 'sentiment_checkpoint.keras' checkpoint = ModelCheckpoint(filepath=path_checkpoint, monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True) # 尝试加载已训练模型 try: model.load_weights(path_checkpoint) graph = tf.get_default_graph() except Exception as e: graph = tf.get_default_graph() print(e) # 定义early stoping如果3个epoch内validation loss没有改善则停止训练 earlystopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1) # 自动降低learning rate
model = build_model(x_train.shape[1:], 2) print(x_train.shape[1:]) # (128, 862) model.summary() # 컴파일, 훈련 model.compile(optimizer='adam', loss="sparse_categorical_crossentropy", metrics=['acc']) es = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1) lr = ReduceLROnPlateau(monitor='val_loss', vactor=0.5, patience=10, verbose=1) path = 'C:/nmb/nmb_data/h5/new_Conv1D_mfcc2.h5' mc = ModelCheckpoint(path, monitor='val_loss', verbose=1, save_best_only=True) history = model.fit(x_train, y_train, epochs=300, batch_size=16, validation_split=0.2, callbacks=[es, lr, mc]) # 평가, 예측 # model = load_model('C:/nmb/nmb_data/h5/new_Conv1D_mels2.h5') result = model.evaluate(x_test, y_test, batch_size=16) print("loss : ", result[0]) print("acc : ", result[1]) pred_pathAudio = 'C:/nmb/nmb_data/pred_voice/'
y_va_age = to_categorical(y_va_age) x_train_current = x_train_age x_train_left = np.hstack([np.expand_dims(x_train_age[:, 0], axis=1), x_train_age[:, 0:-1]]) x_train_right = np.hstack([x_train_age[:, 1:], np.expand_dims(x_train_age[:, -1], axis=1)]) print('x_train_current 维度:', x_train_current.shape) print('x_train_left 维度:', x_train_left.shape) print('x_train_right 维度:', x_train_right.shape) x_val_current = x_va_age x_val_left = np.hstack([np.expand_dims(x_va_age[:, 0], axis=1), x_va_age[:, 0:-1]]) x_val_right = np.hstack([x_va_age[:, 1:], np.expand_dims(x_va_age[:, -1], axis=1)]) print('开始RCNN建模......') max_features = len(word2index) + 1 # 词表的大小 model = RCNN(maxlen, max_features, embedding_dims, 7, 'softmax').get_model() # 指定optimizer、loss、评估标准 model.compile('adam', 'categorical_crossentropy', metrics=['accuracy']) print('训练...') my_callbacks = [ ModelCheckpoint(model_path + 'rcnn_model_age.h5', verbose=1), EarlyStopping(monitor='val_accuracy', patience=2, mode='max') ] # fit拟合数据 history = model.fit([x_train_current, x_train_left, x_train_right], y_train_age, batch_size=batch_size, epochs=epochs, callbacks=my_callbacks, validation_data=([x_val_current, x_val_left, x_val_right], y_va_age))
def train_fn(model_bytes): # Make sure pyarrow is referenced before anything else to avoid segfault due to conflict # with TensorFlow libraries. Use `pa` package reference to ensure it's loaded before # functions like `deserialize_model` which are implemented at the top level. # See https://jira.apache.org/jira/browse/ARROW-3346 pa import atexit import horovod.tensorflow.keras as hvd import os from petastorm import make_batch_reader from petastorm.tf_utils import make_petastorm_dataset import tempfile import tensorflow as tf import tensorflow.keras.backend as K import shutil # Horovod: initialize Horovod inside the trainer. hvd.init() # Horovod: pin GPU to be used to process local rank (one GPU per process), if GPUs are available. config = tf.ConfigProto(intra_op_parallelism_threads=0, inter_op_parallelism_threads=0, allow_soft_placement=True) config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(hvd.local_rank()) K.set_session(tf.Session(config=config)) # Horovod: restore from checkpoint, use hvd.load_model under the hood. model = deserialize_model(model_bytes, hvd.load_model) # Horovod: adjust learning rate based on number of processes. K.set_value(model.optimizer.lr, K.get_value(model.optimizer.lr) * hvd.size()) # Horovod: print summary logs on the first worker. verbose = 2 if hvd.rank() == 0 or hvd.rank() == 1 else 0 callbacks = [ # # Horovod: broadcast initial variable states from rank 0 to all other processes. # # This is necessary to ensure consistent initialization of all workers when # # training is started with random weights or restored from a checkpoint. hvd.callbacks.BroadcastGlobalVariablesCallback(root_rank=0), # # Horovod: average metrics among workers at the end of every epoch. # # # # Note: This callback must be in the list before the ReduceLROnPlateau, # # TensorBoard, or other metrics-based callbacks. hvd.callbacks.MetricAverageCallback(), # # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final # # accuracy. Scale the learning rate `lr = 1.0` ---> `lr = 1.0 * hvd.size()` during # # the first five epochs. See https://arxiv.org/abs/1706.02677 for details. hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=verbose), # # Reduce LR if the metric is not improved for 10 epochs, and stop training # # if it has not improved for 20 epochs. # tf.keras.callbacks.ReduceLROnPlateau(monitor='val_exp_rmspe', patience=10, verbose=verbose), EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5) #tf.keras.callbacks.EarlyStopping(monitor='val_exp_rmspe', mode='min', patience=20, verbose=verbose), #tf.keras.callbacks.TerminateOnNaN() ] # Model checkpoint location. ckpt_dir = tempfile.mkdtemp() #ckpt_dir = os.path.join(os.getcwd(), 'checkpoints') ckpt_file = os.path.join(ckpt_dir, 'checkpoint.h5') atexit.register(lambda: shutil.rmtree(ckpt_dir)) # Horovod: save checkpoints only on the first worker to prevent other workers from corrupting them. if hvd.rank() == 0: callbacks.append(ModelCheckpoint(ckpt_file, monitor='loss', verbose=1, save_best_only=True)) # Make Petastorm readers. with make_batch_reader('%s/train/train_df.parquet' % DATA_LOCATION, num_epochs=None, cur_shard=hvd.rank(), shard_count=hvd.size(), hdfs_driver=PETASTORM_HDFS_DRIVER) as train_reader: with make_batch_reader('%s/validation/validation_df.parquet' % DATA_LOCATION, num_epochs=None, cur_shard=hvd.rank(), shard_count=hvd.size(), hdfs_driver=PETASTORM_HDFS_DRIVER) as val_reader: # Convert readers to tf.data.Dataset. train_ds = make_petastorm_dataset(train_reader) \ .apply(tf.data.experimental.unbatch()) \ .shuffle(int(TRAIN_ROWS / hvd.size())) \ .map(lambda tensor: (tf.py_func(decode_image, [tensor], tf.uint8), tf.py_func(decode_mask, [tensor], tf.uint8))) \ .batch(BATCH_SIZE) #tf.print(tf.shape(train_ds)) #iterator = train_ds.make_one_shot_iterator() #tensor = iterator.get_next() #with tf.Session() as sess: #sample = sess.run(tensor) #print(sample) val_ds = make_petastorm_dataset(val_reader) \ .apply(tf.data.experimental.unbatch()) \ .map(lambda tensor: (tf.py_func(decode_image, [tensor], tf.uint8), tf.py_func(decode_mask, [tensor], tf.uint8))) \ .batch(BATCH_SIZE) history = model.fit(train_ds, validation_data=val_ds, steps_per_epoch=int(TRAIN_ROWS / BATCH_SIZE / hvd.size()), validation_steps=int(VAL_ROWS / BATCH_SIZE / hvd.size()), callbacks=callbacks, verbose=verbose, epochs=10) # Dataset API usage currently displays a wall of errors upon termination. # This global model registration ensures clean termination. # Tracked in https://github.com/tensorflow/tensorflow/issues/24570 globals()['_DATASET_FINALIZATION_HACK'] = model if hvd.rank() == 0: with open(ckpt_file, 'rb') as f: return history.history, f.read()
callbacks = [ EarlyStopping(monitor='val_loss', patience=16, verbose=1, min_delta=0.00001, mode='min'), ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=0.0001, mode='min'), ModelCheckpoint(monitor='val_loss', filepath=root_dir + 'weights/' + weight_name, save_best_only=True, save_weights_only=True, mode='min'), TQDMCallback(), TensorBoard(log_dir=root_dir + weight_name.split('.')[0], histogram_freq=0, write_graph=True, write_images=True) ] history = model.fit_generator( generator=train_generator(batch_size), steps_per_epoch=int((train_df.shape[0] / batch_size) / 18), #344, epochs=100, verbose=2, callbacks=callbacks,
def train_model(reload=False): models_trainable.initialized() data_array = [] data_src = [] data_dest = [] if reload: text_filter = TextFilter() keyword_models = models_trainable.Keyword.select().where( models_trainable.Keyword.t_type >= 1, models_trainable.Keyword.t_type <= 4) keywords = [] for keyword_model in keyword_models: keywords.append(keyword_model.name) videos = models_trainable.Video.select() for i, video in enumerate(videos): title = video.title text_filter.set_text(title) text_filter.regex_from_text(r'\[[^)]*\]') text_filter.remove_texts_from_text() text_filter.remove_pumsas_from_list() text_filter.remove_texts_from_text() matches = text_filter.get_matches(keywords) if len(matches) == 0: data_array.append( [mark_start + mark_none + mark_end, str(text_filter)]) else: for keyword in matches: data_array.append( [mark_start + keyword + mark_end, str(text_filter)]) f_count = len(keywords) print(len(data_array)) for value in data_array: data_src.append(value[1]) data_dest.append(value[0]) # saving with open('count.pickle', 'wb') as handle: pickle.dump(len(keyword_models), handle, protocol=pickle.HIGHEST_PROTOCOL) # saving with open('data_src.pickle', 'wb') as handle: pickle.dump(data_src, handle, protocol=pickle.HIGHEST_PROTOCOL) # saving with open('data_dest.pickle', 'wb') as handle: pickle.dump(data_dest, handle, protocol=pickle.HIGHEST_PROTOCOL) else: # saving with open('count.pickle', 'rb') as handle: f_count = int(pickle.load(handle)) # saving with open('data_src.pickle', 'rb') as handle: data_src = pickle.load(handle) # saving with open('data_dest.pickle', 'rb') as handle: data_dest = pickle.load(handle) tokenizer_src = TokenizerWrap(texts=data_src, padding='pre', reverse=True, num_words=num_words) tokenizer_dest = TokenizerWrap(texts=data_dest, padding='post', reverse=False, num_words=int(f_count) + 4) tokens_src = tokenizer_src.tokens_padded tokens_dest = tokenizer_dest.tokens_padded # encoder_input_data = tokens_src decoder_input_data = tokens_dest[:, :-1] decoder_output_data = tokens_dest[:, 1:] model_train, model_encoder, model_decoder, model_embedding = get_model( f_count) callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint, monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True) callback_early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1) callback_tensorboard = TensorBoard(log_dir='./21_logs/', histogram_freq=0, write_graph=False) callbacks = [ callback_early_stopping, callback_checkpoint, callback_tensorboard ] try: model_train.load_weights(path_checkpoint) except Exception as error: print("Error trying to load checkpoint.") print(error) x_data = \ { 'encoder_input': encoder_input_data, 'decoder_input': decoder_input_data } y_data = \ { 'decoder_output': decoder_output_data } validation_split = 500 / len(encoder_input_data) model_train.fit(x=x_data, y=y_data, batch_size=120, epochs=10, validation_split=validation_split, callbacks=callbacks)
validation_steps = math.ceil( img_itr_validation.samples/batch_size ) # In[36]: from tensorflow.python.keras.callbacks import ModelCheckpoint,CSVLogger cp_filepath = os.path.join(dir_weights,'ep_{epoch:02d}_ls_{loss:.1f}.h5') cp = ModelCheckpoint( cp_filepath, monitor='loss', verbose=0, save_beast_only=False, save_weights_only=True, model='auto', #重みを5エポックで保存 period=5 ) csv_filepath = os.path.join(model_dir,'loss.csv') csv = CSVLogger(csv_filepath,append=True) # In[37]: #モデル学習 history = model.fit_generator( img_itr_train,
def toxicity_prediction_weighted(processed_data,target_index): ''' This function mananges the training process for CNNs model: class weighted ''' img_rows, img_cols = max_row_size, len(alphabet) try: print("toxcity prediction for target %d" % target_index) print('data preparation ...') # dataset for the corresponding taget x_f,y_f = data_prep_1(processed_data,target_index,max_row_size) # define 5-fold cross validation test harness kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed) cvscores = {'acc':[],'recall':[],'fbeta':[]} cvmodel_history = [] for train, test in kfold.split(x_f,y_f): # data reshape train_X = x_f.iloc[train,:].values.reshape(len(y_f.iloc[train]), img_rows, img_cols, 1) train_y = y_f.iloc[train] val_X = x_f.iloc[test,:].values.reshape(len(y_f.iloc[test]), img_rows, img_cols, 1) val_y = y_f.iloc[test] # the imbalance of the dataset n_non_tox_samples = len(train_y[train_y==0]) n_tox_samples = len(train_y[train_y==1]) # class weights class_weights={ 1: n_non_tox_samples / n_tox_samples , # toxic, minor class 0: 1 # non-toxic, major class } print(class_weights) #set early stopping criteria pat = 5 #this is the number of epochs with no improvment after which the training will stop early_stopping = EarlyStopping(monitor='val_loss', patience=pat, verbose=1) #define the model checkpoint callback -> this will keep on saving the model as a physical file model_checkpoint = ModelCheckpoint('target'+str(target_index)+'_model_weighted.h5', verbose=1, save_best_only=True) # establish the CNN model my_model = build_model(img_rows, img_cols) print('Training the model ...') # train the model with training dataset , 20% data for validation history = my_model.fit(train_X, train_y, batch_size= nb_batch_size, epochs=nb_epoch,callbacks=[early_stopping, model_checkpoint], validation_split = 0.2, class_weight=class_weights) # plot_trainning(history,'weighted') cvmodel_history.append(history) # evaluate the model scores = my_model.evaluate(val_X, val_y, verbose=0) print("%s: %.2f%%" % (my_model.metrics_names[1], scores[1]*100)) cvscores['acc'].append(scores[1] * 100) val_predictions = my_model.predict(val_X) val_pred_round = [round(i[0]) for i in val_predictions] # 0.5 threshold [recall,fbeta] = performance(val_y, val_pred_round,1.5) cvscores['recall'].append(recall * 100) cvscores['fbeta'].append(fbeta * 100) return cvscores,cvmodel_history except: return None
model, transfer_layer_output = VGGModel() decoder_model = decoder(state_size, embedding_size, num_words, transfer_layer_output) optimizer = RMSprop(lr=1e-3) decoder_target = tf.placeholder(dtype='int32', shape=(None, None)) decoder_model.compile(optimizer=optimizer, loss=sparse_cross_entropy, target_tensors=[decoder_target]) #transfer_values = get_imgsignature(model,Images,num_images) #transfer_values = np.array(transfer_values).reshape(num_images,4096) transfer_values = np.load("new.npz.npy") path_checkpoint = '22.test.keras' callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint, verbose=1, save_weights_only=True) callback_tensorboard = TensorBoard(log_dir='./22_logs/', histogram_freq=0, write_graph=False) callbacks = [callback_checkpoint, callback_tensorboard] generator = batch_generator(batch_size) #decoder_model.fit_generator(generator=generator,steps_per_epoch=steps_per_epoch,epochs=50,callbacks=callbacks) decoder_model.load_weights('savedweights.hdf5') j = 500 cps = [] imids = [] while j < 600: aa = 'Flicker8k_Dataset/' + list(imagesdict.keys())[_] + '.jpg' bb = generate_caption(model, aa, wordtoint, max_tokens=30) cps.append(bb)
session = tf.Session(config=config) KTF.set_session(session) # mlp.compile(optimizer=Adam(learning_rate=lr), mlp.compile(optimizer=Adam(lr=lr), loss=my_loss, metrics=[my_metric], ) # file_name_trained = '/home/yezhizi/Documents/TianchiMetro/code/ckpt/lr: 0.0001-batch_size: 93312-l2_param: 0.01-dropout: 0.8-training_epochs: 10000-n_inputs: 239-n_outputs: 3-n_hidden: []-n_mlp: [400, 400, 400]-n_samples2146176-weights[10000, 100, 1]/mlp-ep7600-loss88.766-val_loss98.917-lr: 0.0001-batch_size: 93312-l2_param: 0.01-dropout: 0.8-training_epochs: 10000-n_inputs: 239-n_outputs: 3-n_hidden: []-n_mlp: [400, 400, 400]-n_samples2146176-weights[10000, 100, 1].h5' # mlp.load_weights(file_name_trained) if mode == constants.TRAIN: steps_per_epoch = int(np.ceil(n_samples / batch_size)) check_point = ModelCheckpoint(file_name, monitor='val_my_metric', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1) tensor_board = TensorBoard(log_dir='logs/' + file_name[:-3] + '/', histogram_freq=0, write_graph=True, write_images=False) result = mlp.fit_generator(generator=generator(steps_per_epoch), steps_per_epoch=steps_per_epoch, epochs=training_epochs, shuffle=shuffle_samples, validation_data=(valid_x, valid_y), verbose=2, callbacks=[check_point, tensor_board]) print("*************************Finish the softmax output layer training*****************************") # saver.save(sess, 'ckpt/sae.ckpt', global_step=epoch) # pred = mlp.predict(h # print(np.mean(np.abs(pred-train_y[-DATA_A_DAY:]))) mae = validate(mlp)
def train_model(model, train_data, val_data, steps_per_epoch=None, validation_steps=None, tensorboard=True, checkpoint=True, model_name='resnet', save_model=True): """ Trains a keras model given a train and validation datasets. It will checkpoint the best model at each epoch. When model contains Lambda layers, checkpoint should be false. Parameters ---------- model: Model A keras model instance to train data on train_data: TFRecordDataset A dataset contains training data val_data: TFRecordDataset A dataset contains validation data steps_per_epoch: int Number of steps required to complete one training part. validation_steps: int Number of steps required to complete validation part. tensorboard: bool Whether to output tensorboard logs. Default is true checkpoint: bool If true use checkpoint callback to store model model_name: str Model name in str format. Used when saving the model save_model: bool Whether to save the model or not. Returns ------- history: Keras history object contains training history data """ loss_stop = EarlyStopping(patience=2) acc_stop = EarlyStopping(monitor='val_hit1', patience=2, mode='max') callbacks = [loss_stop, acc_stop] if checkpoint: path = "model-{epoch:02d}-{val_loss:.4f}.h5" checkpoint = ModelCheckpoint(path, verbose=1) callbacks.append(checkpoint) if tensorboard: board = TensorBoard(log_dir='./logs/') callbacks.append(board) history = model.fit(x=train_data, steps_per_epoch=steps_per_epoch, epochs=20, validation_data=val_data, validation_steps=validation_steps, verbose=1, callbacks=callbacks) if save_model: if model_name.startswith('moe'): model.save_weights(model_name + '_weights.h5') else: model.save(model_name + '.h5', include_optimizer=False) return history
model.add(tf.keras.layers.BatchNormalization(name='batch_norm2')) model.add(tf.keras.layers.Conv2D(32, 3, activation='relu', name='conv3')) model.add(tf.keras.layers.MaxPooling2D((2, 2), name='pool3')) model.add(tf.keras.layers.Conv2D(64, 3, activation='relu', name='conv4')) model.add(tf.keras.layers.BatchNormalization(name='batch_norm4')) model.add(tf.keras.layers.MaxPooling2D((2, 2), name='pool4')) model.add(tf.keras.layers.Flatten(name='flatten')) model.add(tf.keras.layers.Dense(100, activation='relu', name='dense1')) model.add(tf.keras.layers.Dense(1, activation='sigmoid', name='output')) model.summary() num_steps_per_epoch = SIZE / 16 val_num_steps_per_epoch = VAL_SIZE / 16 callbacks_list = [ ModelCheckpoint(filepath='./neww/model.{epoch:02d}-{val_acc:.2f}.hdf5') ] model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) history = model.fit_generator(train_generator, steps_per_epoch=num_steps_per_epoch, epochs=10, validation_data=validation_generator, validation_steps=val_num_steps_per_epoch, callbacks=callbacks_list, verbose=1) tf.keras.models.save_model(model, './neww/model.h5')
class_mode='categorical') test_set = test_datagen.flow_from_directory('../Datasets/face/val', target_size=IMAGE_SIZE, batch_size=batch_size, class_mode='categorical') '''r=model.fit_generator(training_set, samples_per_epoch = 8000, nb_epoch = 5, validation_data = test_set, nb_val_samples = 2000)''' # fit the model early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, restore_best_weights=True) check_point = ModelCheckpoint(r"..\models\{}".format('face_low_val_loss.h5'), monitor='val_loss', mode='min', save_best_only=True, verbose=1) callbacks = [early_stop, check_point] model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] ) history = model.fit( training_set, steps_per_epoch=len(training_set), epochs=50, validation_data=test_set, validation_steps=len(test_set),
vertical_flip=False, depth_flip=False, min_max_normalization=True, scale_range=0.1, scale_constant_range=0.2 ) train_generator = datagen.flow(x_train, y_train, batch_size) validation_generator = datagen.flow(x_validation, y_validation, batch_size) now = datetime.now() logdir = base_path + "/data/tf-logs/" + now.strftime("%B-%d-%Y-%I:%M%p") + "/" tboard = TensorBoard(log_dir=logdir, histogram_freq=0, write_graph=True, write_images=False) current_checkpoint = ModelCheckpoint(filepath=base_path + '/data/model-weights/latest_model.hdf5', verbose=1) period_checkpoint = ModelCheckpoint(base_path + '/data/model-weights/weights{epoch:03d}.hdf5', period=5) best_weight_checkpoint = ModelCheckpoint(filepath=base_path + '/data/model-weights/best_weights_checkpoint.hdf5', verbose=1, save_best_only=True) conf = tensorflow.ConfigProto(intra_op_parallelism_threads=32, inter_op_parallelism_threads=32) K.set_session(tensorflow.Session(config=conf)) weights_path = base_path + "/data/model-weights/trailmap_model.hdf5" model = get_net() # This will do transfer learning and start the model off with our current best model. # Remove the model.load_weight line below if you want to train from scratch # model.load_weights(weights_path) model.fit_generator(train_generator,
def main(_): print("Getting hyperparameters ...") print("Using command {}".format(" ".join(sys.argv))) flag_values_dict = FLAGS.flag_values_dict() for flag_name in sorted(flag_values_dict.keys()): flag_value = flag_values_dict[flag_name] print(flag_name, flag_value) dataset_name = FLAGS.dataset_name backbone_model_name, freeze_backbone_model = FLAGS.backbone_model_name, FLAGS.freeze_backbone_model image_height, image_width = FLAGS.image_height, FLAGS.image_width input_shape = (image_height, image_width, 3) use_manual_manipulation = FLAGS.use_manual_manipulation use_batchnormalization, dropout_rate = FLAGS.use_batchnormalization, FLAGS.dropout_rate kernel_regularization_factor = FLAGS.kernel_regularization_factor bias_regularization_factor = FLAGS.bias_regularization_factor gamma_regularization_factor = FLAGS.gamma_regularization_factor beta_regularization_factor = FLAGS.beta_regularization_factor fold_num, fold_index = FLAGS.fold_num, FLAGS.fold_index use_validation = fold_num >= 2 evaluate_validation_every_N_epochs = FLAGS.evaluate_validation_every_N_epochs batch_size = FLAGS.batch_size learning_rate_mode, learning_rate_start, learning_rate_end = FLAGS.learning_rate_mode, FLAGS.learning_rate_start, FLAGS.learning_rate_end learning_rate_base, learning_rate_warmup_epochs, learning_rate_steady_epochs = FLAGS.learning_rate_base, FLAGS.learning_rate_warmup_epochs, FLAGS.learning_rate_steady_epochs learning_rate_drop_factor, learning_rate_lower_bound = FLAGS.learning_rate_drop_factor, FLAGS.learning_rate_lower_bound steps_per_epoch = FLAGS.steps_per_epoch epoch_num = FLAGS.epoch_num workers = FLAGS.workers use_multiprocessing = workers > 1 cutmix_probability, mixup_probability = FLAGS.cutmix_probability, FLAGS.mixup_probability image_augmentor_name = FLAGS.image_augmentor_name use_data_augmentation_in_training, use_data_augmentation_in_evaluation = FLAGS.use_data_augmentation_in_training, FLAGS.use_data_augmentation_in_evaluation use_label_smoothing_in_training, use_label_smoothing_in_evaluation = FLAGS.use_label_smoothing_in_training, FLAGS.use_label_smoothing_in_evaluation evaluation_only = FLAGS.evaluation_only pretrained_model_file_path = FLAGS.pretrained_model_file_path output_folder_path = os.path.join( FLAGS.output_folder_path, "{}_{}x{}_{}_{}".format(backbone_model_name, input_shape[0], input_shape[1], fold_num, fold_index)) shutil.rmtree(output_folder_path, ignore_errors=True) os.makedirs(output_folder_path) print("Recreating the output folder at {} ...".format(output_folder_path)) print("Loading the annotations of the {} dataset ...".format(dataset_name)) train_and_valid_accumulated_info_dataframe, train_and_valid_attribute_name_to_label_encoder_dict = load_Bengali( ) if use_validation: print("Using customized cross validation splits ...") train_and_valid_grapheme_array = train_and_valid_accumulated_info_dataframe[ "grapheme"].values train_indexes, valid_indexes = apply_cross_validation( y=train_and_valid_grapheme_array, fold_num=fold_num, fold_index=fold_index) train_accumulated_info_dataframe = train_and_valid_accumulated_info_dataframe.iloc[ train_indexes] valid_accumulated_info_dataframe = train_and_valid_accumulated_info_dataframe.iloc[ valid_indexes] else: train_accumulated_info_dataframe = train_and_valid_accumulated_info_dataframe valid_accumulated_info_dataframe = None print("Initiating the model ...") model, preprocess_input = init_model( backbone_model_name, freeze_backbone_model, input_shape, train_and_valid_attribute_name_to_label_encoder_dict, use_batchnormalization, dropout_rate, kernel_regularization_factor, bias_regularization_factor, gamma_regularization_factor, beta_regularization_factor, evaluation_only, pretrained_model_file_path) try: plot_model(model, show_shapes=True, show_layer_names=True, to_file=os.path.join(output_folder_path, "model.png")) except Exception as exception: # pylint: disable=broad-except print(exception) print("Initiating the image augmentor {} ...".format(image_augmentor_name)) image_augmentor = getattr(image_augmentors_wrapper, image_augmentor_name)() image_augmentor.compose_transforms() print("Perform training ...") train_generator_alpha = VanillaDataSequence( train_accumulated_info_dataframe, train_and_valid_attribute_name_to_label_encoder_dict, input_shape, use_manual_manipulation, batch_size, steps_per_epoch) train_generator_beta = VanillaDataSequence( train_accumulated_info_dataframe, train_and_valid_attribute_name_to_label_encoder_dict, input_shape, use_manual_manipulation, batch_size, steps_per_epoch) train_generator = CutMixAndMixUpDataSequence( datasequence_instance_alpha=train_generator_alpha, datasequence_instance_beta=train_generator_beta, cutmix_probability=cutmix_probability, mixup_probability=mixup_probability) train_generator = PreprocessingDataSequence( train_generator, preprocess_input, image_augmentor, use_data_augmentation_in_training, use_label_smoothing_in_training, ) optimal_model_file_path = os.path.join(output_folder_path, "model.h5") valid_generator = None if use_validation: valid_generator = VanillaDataSequence( valid_accumulated_info_dataframe, train_and_valid_attribute_name_to_label_encoder_dict, input_shape, use_manual_manipulation, batch_size, len(valid_accumulated_info_dataframe) // batch_size) valid_generator = PreprocessingDataSequence( valid_generator, preprocess_input, image_augmentor, use_data_augmentation_in_evaluation, use_label_smoothing_in_evaluation, ) modelcheckpoint_callback = ModelCheckpoint( filepath=optimal_model_file_path, save_best_only=False, save_weights_only=False, period=evaluate_validation_every_N_epochs, verbose=1) learningratescheduler_callback = LearningRateScheduler( schedule=lambda epoch_index: learning_rate_scheduler( epoch_index, epoch_num, learning_rate_mode, learning_rate_start, learning_rate_end, learning_rate_base, learning_rate_warmup_epochs, learning_rate_steady_epochs, learning_rate_drop_factor, learning_rate_lower_bound), verbose=1) historylogger_callback = HistoryLogger(output_folder_path) if evaluation_only: model.fit(x=train_generator, steps_per_epoch=1, validation_data=valid_generator, validation_freq=evaluate_validation_every_N_epochs, callbacks=[historylogger_callback], epochs=1, workers=workers, use_multiprocessing=use_multiprocessing, verbose=2) else: model.fit(x=train_generator, steps_per_epoch=steps_per_epoch, validation_data=valid_generator, validation_freq=evaluate_validation_every_N_epochs, callbacks=[ modelcheckpoint_callback, learningratescheduler_callback, historylogger_callback ], epochs=epoch_num, workers=workers, use_multiprocessing=use_multiprocessing, verbose=2) print("All done!")