def __init__(self, model, out_file, batch_size, train_gen, n_samples, patience_early_stop=3, patience_lr_reduce=2, log_dir='./logs', stop_on_nan=True, lr_schedule=None, lr_reduce=-1, log_csv=True, initial_epoch=0, epochs=100, callbacks=None, valid_gen=None): self.valid_gen = valid_gen self.train_gen = train_gen self.n_samples = n_samples self.model = model self.patience_lr_reduce = patience_lr_reduce self.initial_epoch = initial_epoch self.epochs = epochs self.batch_size = batch_size self.callbacks = [] if patience_early_stop > -1: early_stop = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=patience_early_stop, mode='min', verbose=1) self.callbacks.append(early_stop) if out_file is not None: checkpoint = ModelCheckpoint(log_dir + out_file, monitor='loss', verbose=2, save_best_only=True, mode='min', save_weights_only=False, period=1) self.callbacks.append(checkpoint) if log_dir is not None: tensorboard = TensorBoard(batch_size=batch_size, log_dir=log_dir, write_images=True, histogram_freq=0) self.callbacks.append(tensorboard) if stop_on_nan: stop_nan = TerminateOnNaN() self.callbacks.append(stop_nan) if lr_schedule is not None: schedule = LearningRateScheduler(schedule=lr_schedule) self.callbacks.append(schedule) if lr_reduce > -1: reducer = ReduceLROnPlateau(monitor='loss', factor=lr_reduce, patience=patience_lr_reduce, min_lr=0.00001) self.callbacks.append(reducer) if log_csv: log_file_name = log_dir + '/log.csv' append = Path(log_file_name).is_file() and initial_epoch > 0 csv_logger = CSVLogger(log_file_name, append=append) self.callbacks.append(csv_logger) if callbacks is not None: self.callbacks.extend(callbacks) history = History() self.callbacks.append(history)
def fit(self, X, y): self.model_name = get_name_from_model(self.model) X_fit = X if self.model_name[:12] == 'DeepLearning' or self.model_name in [ 'BayesianRidge', 'LassoLars', 'OrthogonalMatchingPursuit', 'ARDRegression', 'Perceptron', 'PassiveAggressiveClassifier', 'SGDClassifier', 'RidgeClassifier', 'LogisticRegression', 'XGBClassifier', 'XGBRegressor' ]: if self.model_name[:3] == 'XGB' and scipy_sparse.issparse(X): ones = [[1] for _ in range(X.shape[0])] # Trying to force XGBoost to play nice with sparse matrices X_fit = scipy_sparse.hstack((X, ones)) elif scipy_sparse.issparse(X_fit): X_fit = X_fit.todense() if self.model_name[:12] == 'DeepLearning': if keras_installed is False: # Suppress some level of logs os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '3' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # For Keras, we need to tell it how many input nodes to expect, which is our # num_cols num_cols = X_fit.shape[1] model_params = self.model.get_params() del model_params['build_fn'] try: del model_params['feature_learning'] except: # TODO: Fix bare Except pass try: del model_params['num_cols'] except: # TODO: Fix bare Except pass if self.type_of_estimator == 'regressor': self.model = KerasRegressor( build_fn=utils_models.make_deep_learning_model, num_cols=num_cols, feature_learning=self.feature_learning, **model_params) elif self.type_of_estimator == 'classifier': self.model = KerasClassifier( build_fn=utils_models.make_deep_learning_classifier, num_cols=num_cols, feature_learning=self.feature_learning, **model_params) if self.model_name[:12] == 'DeepLearning': try: if self.is_hp_search is True: patience = 5 verbose = 0 else: patience = 25 verbose = 2 X_fit, y, X_test, y_test = self.get_X_test(X_fit, y) if isinstance(X_test, pd.DataFrame): X_test = X_test.values else: # TODO: Try to improve flow control try: X_test = X_test.toarray() except AttributeError: pass if not self.is_hp_search: print( '\nWe will stop training early if we have not seen an improvement in ' 'validation accuracy in {} epochs '.format(patience)) print( 'To measure validation accuracy, we will split off a random 10 percent of ' 'your training data set ') early_stopping = EarlyStopping(monitor='val_loss', patience=patience, verbose=verbose) terminate_on_nan = TerminateOnNaN() now_time = datetime.datetime.now() time_string = str(now_time.year) + '_' + str( now_time.month) + '_' + str(now_time.day) + '_' + str( now_time.hour) + '_' + str(now_time.minute) temp_file_name = 'tmp_dl_model_checkpoint_' + time_string + str( random.random()) + '.h5' model_checkpoint = ModelCheckpoint(temp_file_name, monitor='val_loss', save_best_only=True, mode='min', period=1) callbacks = [early_stopping, terminate_on_nan] if not self.is_hp_search: callbacks.append(model_checkpoint) self.model.fit(X_fit, y, callbacks=callbacks, validation_data=(X_test, y_test), verbose=verbose) # TODO: give some kind of logging on how the model did here! best epoch, # best accuracy, etc. if self.is_hp_search is False: self.model = keras_load_model(temp_file_name) # TODO: Try to improve flow control try: os.remove(temp_file_name) except OSError: pass except KeyboardInterrupt: print( 'Stopping training at this point because we heard a KeyboardInterrupt' ) print( 'If the deep learning model is functional at this point, we will output the ' 'model in its latest form ') print( 'Note that this feature is an unofficial beta-release feature that is known ' 'to fail on occasion ') if self.is_hp_search is False: # TODO: Make sure that temp_file_name is initialized self.model = keras_load_model(temp_file_name) # TODO: Try to improve flow control try: os.remove(temp_file_name) except OSError: pass elif self.model_name[:4] == 'LGBM': if scipy_sparse.issparse(X_fit): X_fit = X_fit.toarray() verbose = True if self.is_hp_search is True: verbose = False train_dynamic_n_estimators = False if self.model.get_params()['n_estimators'] == 2000: train_dynamic_n_estimators = True X_fit, y, X_test, y_test = self.get_X_test(X_fit, y) # TODO: Try to improve flow control try: X_test = X_test.toarray() except AttributeError: pass if self.X_test is not None: eval_name = 'X_test_the_user_passed_in' else: eval_name = 'random_holdout_set_from_training_data' if self.type_of_estimator == 'regressor': if self.training_prediction_intervals is True: eval_metric = 'quantile' else: eval_metric = 'rmse' elif self.type_of_estimator == 'classifier': if len(set(y_test)) > 2: eval_metric = 'multi_logloss' else: eval_metric = 'binary_logloss' cat_feature_indices = self.get_categorical_feature_indices() if self.memory_optimized is True: X_fit.to_csv('_lgbm_dataset.csv') del X_fit if cat_feature_indices is None: if train_dynamic_n_estimators: self.model.fit( X_fit, y, # TODO: Make sure that these are initialized eval_set=[(X_test, y_test)], early_stopping_rounds=100, # TODO: Make sure that eval_metric and eval_names are initialized eval_metric=eval_metric, eval_names=[eval_name], verbose=verbose) else: self.model.fit(X_fit, y, verbose=verbose) else: if train_dynamic_n_estimators: self.model.fit( X_fit, y, # TODO: Similar to above; make sure that all of these are initialized eval_set=[(X_test, y_test)], early_stopping_rounds=100, eval_metric=eval_metric, eval_names=[eval_name], categorical_feature=cat_feature_indices, verbose=verbose) else: self.model.fit(X_fit, y, categorical_feature=cat_feature_indices, verbose=verbose) elif self.model_name[:8] == 'CatBoost': if isinstance(X_fit, pd.DataFrame): X_fit = X_fit.values else: X_fit = X_fit.toarray() if self.type_of_estimator == 'classifier' and len( pd.Series(y).unique()) > 2: # TODO: we might have to modify the format of the y values, converting them all # to ints, then back again (sklearn has a useful inverse_transform on some # preprocessing classes) self.model.set_params(loss_function='MultiClass') cat_feature_indices = self.get_categorical_feature_indices() self.model.fit(X_fit, y, cat_features=cat_feature_indices) elif self.model_name[:16] == 'GradientBoosting': if not sklearn_version > '0.18.1': if isinstance(X_fit, pd.DataFrame): X_fit = X_fit.values else: X_fit = X_fit.toarray() patience = 20 best_val_loss = -10000000000 num_worse_rounds = 0 best_model = deepcopy(self.model) X_fit, y, X_test, y_test = self.get_X_test(X_fit, y) # Add a variable number of trees each time, depending how far into the process we are if os.environ.get('is_test_suite', False) == 'True': num_iters = list(range(1, 50, 1)) + list(range( 50, 100, 2)) + list(range(100, 250, 3)) else: num_iters = list(range( 1, 50, 1)) + list(range(50, 100, 2)) + list( range(100, 250, 3)) + list(range(250, 500, 5)) + list( range(500, 1000, 10)) + list(range( 1000, 2000, 20)) + list(range( 2000, 10000, 100)) # TODO: get n_estimators from the model itself, and reduce this list to only those # values that come under the value from the model try: for num_iter in num_iters: warm_start = True if num_iter == 1: warm_start = False self.model.set_params(n_estimators=num_iter, warm_start=warm_start) self.model.fit(X_fit, y) if self.training_prediction_intervals is True: val_loss = self.model.score(X_test, y_test) else: try: val_loss = self._scorer.score(self, X_test, y_test) except Exception: # TODO: Fix bare Except val_loss = self.model.score(X_test, y_test) if val_loss - self.min_step_improvement > best_val_loss: best_val_loss = val_loss num_worse_rounds = 0 best_model = deepcopy(self.model) else: num_worse_rounds += 1 print( '[' + str(num_iter) + '] random_holdout_set_from_training_data\'s score is: ' + str(round(val_loss, 3))) if num_worse_rounds >= patience: break except KeyboardInterrupt: print( 'Heard KeyboardInterrupt. Stopping training, and using the best ' 'GradientBoosting model with a checkpoint') self.model = best_model print( 'The number of estimators that were the best for this training dataset: ' + str(self.model.get_params()['n_estimators'])) print('The best score on the holdout set: ' + str(best_val_loss)) else: self.model.fit(X_fit, y) if self.X_test is not None: del self.X_test del self.y_test gc.collect() return self
def __init__(self): # Variables to hold the description of the experiment self.config_description = "This is the template config file." # System dependent variable self._workers = 5 self._multiprocessing = True # Variables for comet.ml self._project_name = "jpeg_deep" self._workspace = "ssd" # Network variables self._weights = "/dlocal/home/2017018/bdegue01/weights/jpeg_deep/classification_dct/lcrfat_y/classification_dct_jpeg-deep_cwuagngnTzBVTL1DuPtA4lnl5nHKn6hu/checkpoints/epoch-71_loss-0.9257_val_loss-1.7127.h5" self._network = SSD300_resnet(backbone="lcrfat_y", dct=True, image_shape=(38, 38)) # Training variables self._epochs = 240 self._batch_size = 32 self._steps_per_epoch = 1000 self.optimizer_parameters = {"lr": 0.001, "momentum": 0.9} self._optimizer = SGD(**self.optimizer_parameters) self._loss = SSDLoss(neg_pos_ratio=3, alpha=1.0).compute_loss self._metrics = None dataset_path = environ["DATASET_PATH"] images_2007_path = join(dataset_path, "VOC2007/JPEGImages") images_2012_path = join(dataset_path, "VOC2012/JPEGImages") self.train_sets = [(images_2007_path, join(dataset_path, "VOC2007/ImageSets/Main/trainval.txt")), (images_2012_path, join(dataset_path, "VOC2012/ImageSets/Main/trainval.txt"))] self.validation_sets = [(images_2007_path, join(dataset_path, "VOC2007/ImageSets/Main/test.txt"))] self.test_sets = [(images_2012_path, join(dataset_path, "VOC2012/ImageSets/Main/test.txt"))] # Keras stuff self.model_checkpoint = None self.reduce_lr_on_plateau = ReduceLROnPlateau(patience=5, verbose=1) self.terminate_on_nan = TerminateOnNaN() self.early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=15) self._callbacks = [ self.reduce_lr_on_plateau, self.early_stopping, self.terminate_on_nan ] self.input_encoder = SSDInputEncoder() self.train_tranformations = [SSDDataAugmentation()] self.validation_transformations = [ ConvertTo3Channels(), Resize(height=300, width=300) ] self.test_transformations = [ ConvertTo3Channels(), Resize(height=300, width=300) ] self._train_generator = None self._validation_generator = None self._test_generator = None self._horovod = None
def PGNN_train_test(optimizer_name, optimizer_val, drop_frac, use_YPhy, iteration, n_layers, n_nodes, tr_size, lamda, reg, samp): # fix_seeds(ss) # Hyper-parameters of the training process # batch_size = tr_size batch_size = 1 num_epochs = 300 val_frac = 0.25 patience_val = 80 # Initializing results filename exp_name = optimizer_name + '_drop' + str(drop_frac) + '_usePhy' + str( use_YPhy) + '_nL' + str(n_layers) + '_nN' + str( n_nodes) + '_trsize' + str(tr_size) + '_lamda' + str( lamda) + '_iter' + str(iteration) exp_name = exp_name.replace('.', 'pt') results_dir = '../results/' model_name = results_dir + exp_name + '_model.h5' # storing the trained model if reg == True and samp == 25: results_name = results_dir + exp_name + '_results_25_regularizer.dat' # storing the results of the model elif reg == False and samp == 25: results_name = results_dir + exp_name + '_results_25.dat' # storing the results of the model elif reg == True and samp == 1519: results_name = results_dir + exp_name + '_results_1519_regularizer.dat' # storing the results of the model elif reg == False and samp == 1519: results_name = results_dir + exp_name + '_results_1519.dat' # storing the results of the model # Load labeled data data = np.loadtxt('../data/labeled_data.dat') # data = np.loadtxt('../data/labeled_data_BK_constw_unique.dat') # data = np.loadtxt('../data/labeled_data_BK_constw_v2.dat') # x_labeled = data[:, :2] # -2 because we do not need porosity predictions x_label = data[:, : -3] # -2 because we do not need porosity predictions x_labeled = np.hstack((x_label[:, :2], x_label[:, -2:])) y_labeled = data[:, -3: -1] # dimensionless bond length and porosity measurements if samp == 25: data = np.loadtxt('../data/unlabeled_data_BK_constw_v2_25.dat') x_unlabeled = data[:, :] elif samp == 1519: data = np.loadtxt('../data/unlabeled_data_BK_constw_v2_1525.dat') x_unlabeled = data[:, :] x_unlabeled1 = x_unlabeled[:1303, :] x_unlabeled2 = x_unlabeled[-6:, :] x_unlabeled = np.vstack((x_unlabeled1, x_unlabeled2)) # initial porosity init_poro = x_unlabeled[:, -1] x_unlabeled = np.hstack((x_unlabeled[:, :2], x_unlabeled[:, -3:-1])) # x_unlabeled = x_unlabeled[:, :2] # normalize dataset with MinMaxScaler scaler = preprocessing.MinMaxScaler(feature_range=(0.0, 1.0)) # scaler = preprocessing.StandardScaler() x_labeled = scaler.fit_transform(x_labeled) # y_labeled = scaler.fit_transform(y_labeled) x_unlabeled = scaler.fit_transform(x_unlabeled) # # initial porosity & physics outputs are removed # x_unlabeled = x_unlabeled[:, :-3] # train and test data trainX, trainY = x_labeled[:tr_size, :], y_labeled[:tr_size] # testX, testY = x_labeled[tr_size:,:], y_labeled[tr_size:] testX, testY = x_labeled[tr_size:, :], y_labeled[tr_size:] if use_YPhy == 0: # Removing the last column from x_unlabeled (corresponding to Y_PHY) x_unlabeled = x_unlabeled[:, :-1] # Creating the model model = Sequential() for layer in np.arange(n_layers): if layer == 0: model.add( Dense(n_nodes, activation='relu', input_shape=(np.shape(trainX)[1], ))) else: if reg: model.add( Dense(n_nodes, activation='relu', kernel_regularizer=l1_l2(l1=.001, l2=.001))) else: model.add(Dense(n_nodes, activation='relu')) # model.add(Dropout(rate=drop_frac)) model.add(MCDropout(rate=drop_frac)) model.add(Dense(2, activation='linear')) # physics-based regularization uinp_sc = K.constant(value=x_unlabeled) # unlabeled input data lam1 = K.constant(value=lamda[0]) # regularization hyper-parameter lam2 = K.constant(value=lamda[1]) # regularization hyper-parameter lam3 = K.constant(value=lamda[2]) # regularization hyper-parameter lam4 = K.constant(value=lamda[3]) # regularization hyper-parameter predictions = model(uinp_sc) # model output at depth i # porosity = K.relu(predictions[:,1]) phyloss1 = bond(predictions[:, 0]) # physics loss 1 # uinp = K.constant(value=x_unlabeled_non) # unlabeled input data phyloss2 = poros(init_poro, predictions[:, 1]) # physics loss 1 phyloss3 = strength1(predictions[:, 0], predictions[:, 1]) phyloss4 = strength2(predictions[:, 0], predictions[:, 1]) totloss = combined_loss( [phyloss1, phyloss2, phyloss3, phyloss4, lam1, lam2, lam3, lam4]) phyloss = phy_loss_mean( [phyloss1, phyloss2, phyloss3, phyloss4, lam1, lam2, lam3, lam4]) model.compile(loss=totloss, optimizer=optimizer_val, metrics=[phyloss, root_mean_squared_error]) early_stopping = EarlyStopping(monitor='val_loss', patience=patience_val, verbose=1) # print('Running...' + optimizer_name) history = model.fit(trainX, trainY, batch_size=batch_size, epochs=num_epochs, verbose=0, validation_split=val_frac, callbacks=[early_stopping, TerminateOnNaN()]) # early_stopping = EarlyStopping(monitor='loss', patience=patience_val, verbose=1) # history = model.fit(trainX, trainY, # batch_size=batch_size, # epochs=num_epochs, # verbose=1, # callbacks=[early_stopping, TerminateOnNaN()]) # test_score = model.evaluate(testX, testY, verbose=0) # predictions = model.predict(x_labeled) # model output at depth i # print(np.sort(predictions[:,0], axis=0)) # predictions = model.predict(x_unlabeled) # model output at depth i # print(np.sort(predictions[:,0], axis=0)) # print('iter: ' + str(iteration) + ' useYPhy: ' + str(use_YPhy) + # ' nL: ' + str(n_layers) + ' nN: ' + str(n_nodes) + # ' lamda1: ' + str(lamda[0]) + ' lamda2: ' + str(lamda[1]) + ' trsize: ' + str(tr_size) + # ' TestRMSE: ' + str(test_score[2]) + ' PhyLoss: ' + str(test_score[1]), ' TestLoss: ' + str(test_score[0]), "\n") # # print('iter: ' + str(iteration) + ' TestRMSE: ' + str(test_score[2]) + ' PhyLoss: ' + str(test_score[1]), "\n") # # model.save(model_name) # # save results # results = {'train_loss_1':history.history['loss_1'], # 'val_loss_1':history.history['val_loss_1'], # 'train_rmse':history.history['root_mean_squared_error'], # 'val_rmse':history.history['val_root_mean_squared_error'], # 'test_rmse':test_score[2], # 'PhyLoss':test_score[1]} # results = {'train_loss_1':history.history['loss_1'], # 'train_rmse':history.history['root_mean_squared_error'], # 'test_rmse':test_score[2], # 'PhyLoss':test_score[1]} # save_obj(results, results_name) # predictions = model.predict(testX) # return results, results_name, predictions, testY, test_score[2], trainY test_score = model.evaluate(testX, testY, verbose=1) print(test_score) samples = [] for i in range(int(nsim)): print("simulation num:", i) predictions = model.predict(Xx) predictions = predictions[:, 1] samples.append(predictions) return np.array(samples)
# Define a learning rate schedule. def lr_schedule(epoch): if epoch < 80: return 0.001 elif epoch < 100: return 0.0001 else: return 0.00001 learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() tb_cb = TensorBoard(log_dir="LOG/log7") callbacks = [ model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan, tb_cb ] # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly. initial_epoch = 0 final_epoch = 120 steps_per_epoch = 1000 history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch,
def PGNN_train_test(optimizer_name, optimizer_val, drop_rate, iteration, n_layers, n_nodes, tr_size, lamda, reg): # Hyper-parameters of the training process # batch_size = int(tr_size/2) batch_size = 1 num_epochs = 300 val_frac = 0.25 patience_val = 80 # Initializing results filename exp_name = optimizer_name + '_drop' + str(drop_rate) + '_nL' + str(n_layers) + '_nN' + str(n_nodes) + '_trsize' + str(tr_size) + '_iter' + str(iteration) exp_name = exp_name.replace('.','pt') results_dir = '../results/' model_name = results_dir + exp_name + '_NoPhyInfomodel.h5' # storing the trained model if reg: results_name = results_dir + exp_name + '_results_regularizer.dat' # storing the results of the model else: results_name = results_dir + exp_name + '_results.dat' # storing the results of the model # Load labeled data data = np.loadtxt('../data/labeled_data.dat') # data = np.loadtxt('../data/labeled_data_BK_constw_unique.dat') # data = np.loadtxt('../data/labeled_data_BK_constw_v2.dat') # x_labeled = data[:, :-5] # -2 because we do not need porosity predictions x_labeled = data[:, :2] # -2 because we do not need porosity predictions y_labeled = data[:, -2:-1] # normalize dataset with MinMaxScaler scaler = preprocessing.MinMaxScaler(feature_range=(0, 1.0)) # scaler = preprocessing.StandardScaler() x_labeled = scaler.fit_transform(x_labeled) # y_labeled = scaler.fit_transform(y_labeled) # train and test data trainX, trainY = x_labeled[:tr_size,:], y_labeled[:tr_size] # testX, testY = x_labeled[tr_size:,:], y_labeled[tr_size:] # init_poro = data[tr_size:, -1] testX, testY = x_labeled[tr_size:,:], y_labeled[tr_size:] init_poro = data[tr_size:, -1] # Creating the model model = Sequential() for layer in np.arange(n_layers): if layer == 0: model.add(Dense(n_nodes, activation='relu', input_shape=(np.shape(trainX)[1],))) else: if reg: model.add(Dense(n_nodes, activation='relu', kernel_regularizer=l1_l2(l1=.001, l2=.001))) else: model.add(Dense(n_nodes, activation='relu')) model.add(Dropout(rate=drop_rate)) model.add(Dense(1, activation='linear')) model.compile(loss='mean_squared_error', optimizer=optimizer_val, metrics=[root_mean_squared_error]) early_stopping = EarlyStopping(monitor='val_loss', patience=patience_val,verbose=1) print('Running...' + optimizer_name) history = model.fit(trainX, trainY, batch_size=batch_size, epochs=num_epochs, verbose=0, validation_split=val_frac, callbacks=[early_stopping, TerminateOnNaN()]) test_score = model.evaluate(testX, testY, verbose=1) print(test_score) samples = [] for i in range(int(nsim)): print("simulation num:",i) predictions = model.predict(Xx) samples.append(predictions[:,np.newaxis]) return np.array(samples)
try: init_args = yaml.load(stream) except yaml.YAMLError as exc: print(exc) checkpointer = ModelCheckpoint( filepath="/usr/local/workspace/keras-fcn/voc2011/tmp/fcn_vgg16_weights.h5", verbose=1, save_best_only=True) lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1), cooldown=0, patience=10, min_lr=1e-12) early_stopper = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10) nan_terminator = TerminateOnNaN() csv_logger = CSVLogger( #'output/{}_fcn_vgg16.csv'.format(datetime.datetime.now().isoformat())) '/usr/local/workspace/keras-fcn/voc2011/output/tmp_fcn_vgg16.csv') #check_num = CheckNumericsOps(validation_data=[np.random.random((1, 224, 224, 3)), 1], # histogram_freq=100) datagen = PascalVocGenerator(image_shape=[224, 224, 3], image_resample=True, pixelwise_center=True, pixel_mean=[115.85100, 110.50989, 102.16182], pixelwise_std_normalization=True, pixel_std=[70.30930, 69.41244, 72.60676]) train_loader = ImageSetLoader(**init_args['image_set_loader']['train']) val_loader = ImageSetLoader(**init_args['image_set_loader']['val'])
def main(): corpus = TIMIT(dirpath=TIMIT_DATA_DIR) quantizer = MuLawQuantizer(k=256) gm = AutoregressiveGenerativeModel(quantizer, nbChannels=corpus.nbChannels, nbFilters=64, name='gm') modelPath = os.path.join(CDIR, 'models') if not os.path.exists(modelPath): os.makedirs(modelPath) modelFilename = os.path.join(modelPath, 'gm.h5') logPath = os.path.join( CDIR, 'logs', 'gm', datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) if not os.path.exists(logPath): os.makedirs(logPath) gm.compile(optimizer=Adam(1e-2), loss='categorical_crossentropy', metrics=[ 'categorical_accuracy', ]) # top_3_categorical_accuracy, # top_5_categorical_accuracy, # top_10_categorical_accuracy]) print(gm.summary()) # NOTE: memory requirement for processing speech on the raw waveform is too high to have a large batch size batchSize = 8 trainData = corpus.trainData() trainGenerator = AutoregressiveGenerativeModelDataGenerator( gm, trainData, corpus.fs, batchSize) logger.info('Number of audio samples in training set: %d' % (len(trainData))) testData = corpus.testData() testGenerator = AutoregressiveGenerativeModelDataGenerator( gm, testData, corpus.fs, batchSize) logger.info('Number of audio samples in test set: %d' % (len(testData))) callbacks = [] tensorboard = TensorBoard(logPath, histogram_freq=1, write_graph=False, batch_size=batchSize, write_grads=True) tensorboardGenerator = AutoregressiveGenerativeModelDataGenerator( gm, testData[:10], corpus.fs, batchSize) x, y = tensorboardGenerator[0] tensorboard.validation_data = [ x, # X y, # y np.ones(len(x)), # sample weights ] callbacks.append(tensorboard) checkpointer = ModelCheckpoint(modelFilename, monitor='val_loss', save_best_only=True, save_weights_only=True) callbacks.append(checkpointer) callbacks.append( ReduceLROnPlateau(monitor='loss', factor=0.2, patience=5, min_lr=1e-5)) callbacks.append(TerminateOnNaN()) callbacks.append( EarlyStopping(monitor='val_loss', min_delta=1e-6, patience=50, mode='auto')) try: gm.fit_generator(trainGenerator, epochs=200, validation_data=testGenerator, use_multiprocessing=False, shuffle=True, verbose=1, callbacks=callbacks) except KeyboardInterrupt: logger.info("Training interrupted by the user") gm.save_weights(modelFilename) gm.save_weights(modelFilename) logger.info('All done.')
def main(): parser = ConfigParser(interpolation=ExtendedInterpolation()) parser.read("config.ini") params = parser["train"] backbone = params["backbone"] # use this environment flag to change which GPU to use # os.environ["CUDA_VISIBLE_DEVICES"] = "1" # set the modified tf session as backend in keras keras.backend.tensorflow_backend.set_session(get_session()) resume = bool(params["resume"]) # if resuming load saved model instead if resume: model = models.load_model(params["model_path"], backbone_name=backbone) else: model = models.backbone(backbone).retinanet( num_classes=int(params["num_classes"])) model.compile(loss={ 'regression': losses.smooth_l1(), 'classification': losses.focal() }, optimizer=keras.optimizers.adam(lr=1e-5, clipnorm=0.001), metrics=['accuracy']) weights = params["weights_path"] model.load_weights(weights, by_name=True, skip_mismatch=True) batch_size = int(params["batchsize"]) preprocesss = get_preprocessing(bool(params["preprocess"])) anno_path = "retinanet_annotations/{}".format(params["dataset"]) data = CSVGenerator(os.path.join(anno_path, "train_annotation.csv"), os.path.join(anno_path, "class_map.csv"), batch_size=batch_size, image_min_side=512, image_max_side=512, preprocess_image=preprocesss) val_data = CSVGenerator(os.path.join(anno_path, "val_annotation.csv"), os.path.join(anno_path, "class_map.csv"), batch_size=batch_size, image_min_side=512, image_max_side=512, preprocess_image=preprocesss) val_dataset_size = val_data.size() # TODO: Set the file path under which you want to save the model. current_time = datetime.now().strftime('%Y-%m-%d %H:%M').split(" ") model_checkpoint = ModelCheckpoint(filepath=os.path.join( params["check_point_path"], 'retinanet_{}_{}_{}_{}_{}.h5'.format(params["dataset"], backbone, params["preprocess"], current_time[0], current_time[1])), monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=int(params["save_period"])) # model_checkpoint.best = csv_logger = CSVLogger(filename=os.path.join( params["csv_path"], 'retinanet_{}_{}_{}_{}_{}.csv'.format(params["dataset"], backbone, params["preprocess"], current_time[0], current_time[1])), separator=',', append=True) # learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() tensorboard = TensorBoard(log_dir=os.path.join(params["tensorboard_path"], 'retinanet', current_time[0], current_time[1]), write_images=True, write_graph=True) callbacks = [ model_checkpoint, csv_logger, # learning_rate_scheduler, terminate_on_nan, tensorboard, ] # Fit model # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly. if resume: initial_epoch = int(params["initial_epoch"]) final_epoch = int(params["final_epoch"]) else: initial_epoch = 0 final_epoch = int(params["epochs"]) steps_per_epoch = int(params["steps_per_epoch"]) model.fit_generator( generator=data, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_data, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch, )
def PGNN_train_test(optimizer_name, optimizer_val, use_YPhy, pre_train, tr_size, lamda, iteration, n_nodes, n_layers, drop_frac, reg, samp): # fix_seeds(ss) # Hyper-parameters of the training process # batch_size = tr_size batch_size = 1 num_epochs = 300 val_frac = 0.25 patience_val = 80 # Initializing results filename exp_name = "DNN_pre_loss_" + pre_train + optimizer_name + '_trsize' + str(tr_size) + '_lamda' + str(lamda) + '_iter' + str(iteration) exp_name = exp_name.replace('.','pt') results_dir = '../results/' model_name = results_dir + exp_name + '_model.h5' # storing the trained model if reg==True and samp==25: results_name = results_dir + exp_name + '_results_25_regularizer.dat' # storing the results of the model elif reg==False and samp==25: results_name = results_dir + exp_name + '_results_25.dat' # storing the results of the model elif reg==True and samp==1519: results_name = results_dir + exp_name + '_results_1519_regularizer.dat' # storing the results of the model elif reg==False and samp==1519: results_name = results_dir + exp_name + '_results_1519.dat' # storing the results of the model # Load labeled data data = np.loadtxt('../data/labeled_data.dat') x_labeled = data[:, :2] # -2 because we do not need porosity predictions y_labeled = data[:, -2:-1] # dimensionless bond length and porosity measurements if samp==25: data = np.loadtxt('../data/unlabeled_data_BK_constw_v2_25.dat') x_unlabeled = data[:, :] elif samp==1519: data = np.loadtxt('../data/unlabeled_data_BK_constw_v2_1525.dat') x_unlabeled = data[:, :] x_unlabeled1 = x_unlabeled[:1303, :] x_unlabeled2 = x_unlabeled[-6:, :] x_unlabeled = np.vstack((x_unlabeled1,x_unlabeled2)) # initial porosity init_poro = x_unlabeled[:, -1] x_unlabeled = x_unlabeled[:, :2] # normalize dataset with MinMaxScaler scaler = preprocessing.MinMaxScaler(feature_range=(0.0, 1.0)) x_labeled = scaler.fit_transform(x_labeled) x_unlabeled = scaler.fit_transform(x_unlabeled) # y_labeled = scaler.fit_transform(y_labeled) # # initial porosity & physics outputs are removed # x_unlabeled = x_unlabeled[:, :-3] # train and test data trainX, trainY = x_labeled[:tr_size,:], y_labeled[:tr_size] # testX, testY = x_labeled[tr_size:,:], y_labeled[tr_size:] testX, testY = x_labeled[tr_size:,:], y_labeled[tr_size:] if use_YPhy == 0: # Removing the last column from x_unlabeled (corresponding to Y_PHY) x_unlabeled = x_unlabeled[:,:-1] dependencies = { 'root_mean_squared_error': root_mean_squared_error } # load the pre-trained model using non-calibrated physics-based model predictions (./data/unlabeled.dat) loaded_model = load_model(results_dir + pre_train, custom_objects=dependencies) # Creating the model model = Sequential() for layer in np.arange(n_layers): if layer == 0: model.add(Dense(n_nodes, activation='relu', input_shape=(np.shape(trainX)[1],))) else: if reg: model.add(Dense(n_nodes, activation='relu', kernel_regularizer=l1_l2(l1=.001, l2=.001))) else: model.add(Dense(n_nodes, activation='relu')) model.add(Dropout(rate=drop_frac)) # model.add(MCDropout(rate=drop_frac)) model.add(Dense(1, activation='linear')) # pass the weights to all layers but 1st input layer, whose dimensions are updated for new_layer, layer in zip(model.layers[1:], loaded_model.layers[1:]): new_layer.set_weights(layer.get_weights()) # physics-based regularization uinp_sc = K.constant(value=x_unlabeled) # unlabeled input data lam1 = K.constant(value=lamda[0]) # regularization hyper-parameter predictions = model(uinp_sc) # model output at depth i phyloss2 = poros(init_poro, predictions) # physics loss 1 totloss = combined_loss([phyloss2, lam1]) phyloss = phy_loss_mean([phyloss2, lam1]) model.compile(loss=totloss, optimizer=optimizer_val, metrics=[phyloss, root_mean_squared_error]) early_stopping = EarlyStopping(monitor='val_loss', patience=patience_val, verbose=1) # print('Running...' + optimizer_name) history = model.fit(trainX, trainY, batch_size=batch_size, epochs=num_epochs, verbose=0, validation_split=val_frac, callbacks=[early_stopping, TerminateOnNaN()]) test_score = model.evaluate(testX, testY, verbose=1) print(test_score) samples = [] for i in range(int(nsim)): print("simulation num:",i) predictions = model.predict(Xx) samples.append(predictions) return np.array(samples)
def main(args): # ------------------- # Checks, for anything really bad that happens due to bugs # ------------------- assert ( args.tfpn is False, "ERROR: Currently, using TFPN makes networks not train. It is most likely due to do " "weights not updating, or some such. This needs fixing, eventually.") assert ( args.gpus == 1, "ERROR: Currently, using multiple GPUs will crash training when it comes time to compute the " "embeddings etc. This needs to be fixed.") # ------------------- # Check Parameters # ------------------- if not args.scratch_dir: raise ValueError("Scratch directory needs to be given") l12_reg = ast.literal_eval(args.l12_regularisation) # -------------------------------------- # Set up logging and scratch directories # -------------------------------------- os.makedirs(args.scratch_dir, exist_ok=True) model_dir = os.path.join(args.scratch_dir, 'models') os.makedirs(model_dir, exist_ok=True) log_dir = os.path.join(args.scratch_dir, 'logs') os.makedirs(log_dir, exist_ok=True) # --------------------------------------- # Check datasets exist # --------------------------------------- if not os.path.isfile(args.coco_path): raise ValueError("Training dataset %s does not exist" % args.coco_path) if args.val_coco_path and not os.path.isfile(args.val_coco_path): raise ValueError("Validation dataset %s does not exist" % args.val_coco_path) if not args.no_trains and not args.trains_project: raise ValueError( "If experiment tracking is on, the --trains-project needs to be given" ) elif not args.no_trains: warnings.warn("Experiment tracking is turned off!") task = None else: if args.trains_experiment: experiment_name = args.trains_experiment else: experiment_name = os.path.basename(args.scratch_dir) task = Task.init(args.trains_project, experiment_name) with open(os.path.join(args.scratch_dir, 'task_id.txt'), 'w') as task_id_file: task_id_file.write(task.id) print("TRAINS - Project %s - Experiment %s" % (args.trains_project, experiment_name)) # ------------------------------ # Read + initalise category map # ------------------------------ raw_cat_map = json.load( open(args.category_map, 'r') ) # Load the caption map - caption_map should live on place on servers cat_map = {} for k, v in raw_cat_map.items(): cat_map[int(k)] = v cat_translator = CategoryTranslator(mapping=cat_map) num_classes = len(set( cat_map.values())) # Get num classes from caption map image_shape = [int(x) for x in args.image_shape.split(',')] # ------------------------ # Initialise datasets # ------------------------ train_dataset = ImageClassificationDataset(args.coco_path, translator=cat_translator) train_gen = train_dataset.generator(endless=True, shuffle_ids=True) if args.val_coco_path: val_dataset = ImageClassificationDataset(args.val_coco_path, translator=cat_translator) val_gen = val_dataset.generator(endless=True, shuffle_ids=True) else: val_gen = None val_dataset = None # ----------------------------------- # Create Augmentation Configuration # ----------------------------------- if args.augmentation_configuration: aug_config = ast.literal_eval(args.augmentation_configuration) else: aug_config = { "some_of": None, # Do all (None=do all, 1=do one augmentation) "flip_lr": True, # Flip 50% of the time "flip_ud": True, # Flip 50% of the time "gblur": None, # No Gaussian Blur "avgblur": None, # No Average Blur "gnoise": (0, 0.05), # Add a bit of Gaussian noise "scale": (0.8, 1.2), # Don't scale "rotate": (-22.5, 22.5), # Don't rotate "bright": (0.9, 1.1), # Darken/Brighten (as ratio) "colour_shift": (0.95, 1.05), # Colour shift (as ratio) "cval": -1 } augmentation_cfg = create_augmentation_configuration(**aug_config) # ------------------------- # Create data pipeline # ------------------------- def preprocess(image, caption): """ A preprocessing function to resize the image Args: image: (np.ndarray) The image caption: passedthrough Returns: image, caption """ image = resize(image, image_shape, preserve_range=True) return preprocess_input(image.astype(NN_DTYPE)), caption def pipeline( gen, num_classes, batch_size, do_data_aug=False, ): """ A sequence of generators that perform operations on the data Args: gen: the base generator (e.g. from dataset.generator()) num_classes: (int) the number of classes, to create a multihot vector batch_size: (int) the batch size, for the batching generator Returns: """ return (batching_gen(augmentation_gen(lambda_gen( multihot_gen(lambda_gen(gen, func=preprocess), num_classes=num_classes), func=enforce_one_vs_all), aug_config=augmentation_cfg, enable=do_data_aug), batch_size=batch_size)) def cache_pipeline(gen, num_classes): return (lambda_gen(multihot_gen(lambda_gen(gen, func=preprocess), num_classes=num_classes), func=enforce_one_vs_all)) # limit the process GPU usage. Without this, can get CUDNN_STATUS_INTERNAL_ERROR import tensorflow as tf from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = args.gpu_fraction config.gpu_options.allow_growth = args.gpu_allow_growth set_session(tf.Session(config=config)) # ------------------------- # Initialise Optimizer # ------------------------- optimizer_args = ast.literal_eval(args.optimizer_args) optimizer_args[ 'lr'] = args.lr # The init_lr argument to classification.Task is what actually sets the optimizer learning rate # ------------------------- # Create Classifier Model # ------------------------- if args.resume_from_ckpt: classifier = classification.Task.load(args.resume_from_ckpt) classifier.set_lr(args.lr) # update learning rate elif args.load_params_json: classifier = classification.Task.from_json(args.load_params_json) else: classifier = classification.Task( backbone=args.backbone, output_activation=args.output_activation, pooling=args.pooling, classes=num_classes, input_shape=tuple(image_shape), init_weights=args.weights, init_epoch=0, init_lr=args.lr, trainable=False if args.only_train_head else True, # if only_train_head, then all layers are frozen. Head layer is set to trainable later optimizer=args.optimizer, optimizer_args=optimizer_args, loss=args.loss, metrics=['accuracy'], gpus=args.gpus, l12_reg=l12_reg) if args.only_train_head: classifier._maybe_create_model() classifier.set_trainable({'logits': True}) classifier.dump_args(os.path.join(args.scratch_dir, 'params.json')) # -------------------------------------- # Calculate number of steps (batches) # -------------------------------------- if args.training_steps: train_steps = args.training_steps else: train_steps = np.floor(len(train_dataset) / args.batch_size) if args.validation_steps: val_steps = args.validation_steps else: val_steps = int( np.floor(len(val_dataset) / args.batch_size)) if val_dataset is not None else None # ------------------------------ # Configure the validation data # ------------------------------ # Set the validation pipeline - shouldn't have image augmentation val_pipeline = pipeline(val_gen, num_classes=num_classes, batch_size=args.batch_size, do_data_aug=False) if val_gen else None # If the if args.cache_val and val_gen: print("CACHING VAL", file=sys.stderr) val_data = gen_dump_data(gen=cache_pipeline(val_gen, num_classes), num_images=val_steps, verbose=True) tfpn = args.tfpn histogram_freq = args.histogram_freq pr_curves = args.pr_curves else: val_data = val_pipeline if args.tfpn: warnings.warn( "TFPN doesn't work properly unless val is cached. Used --cache-val to cache val. Setting to False" ) if args.histogram_freq > 0: warnings.warn( "Histograms don't work unless val is cached. Used --cache-val to cache val. Setting to 0" ) if args.pr_curves: warnings.warn( "PR Curves don't work properly unless val is cached. Used --cache-val to cache val. Setting to False" ) tfpn = False histogram_freq = 0 pr_curves = False # ------------------ # Callbacks # ------------------ if args.embeddings: print("CACHING EMBEDDING", file=sys.stderr) from abyss_deep_learning.keras.tensorboard import produce_embeddings_tsv assert args.gpus == 1, 'Due to a bug, if calculating embeddings, only 1 gpu can be used' embeddings_data = gen_dump_data( gen=cache_pipeline(val_gen, num_classes), num_images=int(np.floor(len(val_dataset) / args.batch_size)), verbose=True) produce_embeddings_tsv(os.path.join(log_dir, 'metadata.tsv'), headers=[str(i) for i in range(num_classes)], labels=embeddings_data[1]) embeddings_freq = args.embeddings_freq else: embeddings_data = [None, None] embeddings_freq = 0 try: from abyss_deep_learning.keras.tensorboard import ImprovedTensorBoard improved_tensorboard = ImprovedTensorBoard( log_dir=log_dir, batch_size=args.batch_size, write_graph=True, embeddings_freq=embeddings_freq, embeddings_metadata=os.path.join(log_dir, 'metadata.tsv'), embeddings_data=embeddings_data[0], embeddings_layer_names=['global_average_pooling2d_1'], num_classes=num_classes, val_generator=pipeline( val_gen, num_classes=num_classes, batch_size=args.batch_size) if (val_gen and not args.cache_val) else None, val_steps=val_steps, tfpn=tfpn, pr_curve=pr_curves, histogram_freq=histogram_freq) except: improved_tensorboard = None warnings.warn("failed to import tensorboard; running without it") if improved_tensorboard is None: callbacks = [ SaveModelCallback(classifier.save, model_dir, save_interval=args.save_model_interval ) # A callback to save the model , ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=25, min_lr=1e-8), TerminateOnNaN() ] else: callbacks = [ SaveModelCallback(classifier.save, model_dir, save_interval=args.save_model_interval ), # A callback to save the model improved_tensorboard, #ImprovedTensorBoard(log_dir=log_dir, histogram_freq=3, batch_size=args.batch_size, write_graph=True, write_grads=True, num_classes=num_classes, pr_curve=False, val_generator=pipeline(val_gen, num_classes=num_classes, batch_size=1) if (val_gen and not args.cache_val) else None, val_steps=val_steps), ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=25, min_lr=1e-8), TerminateOnNaN() ] if args.early_stopping_patience: callbacks.append( EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=args.early_stopping_patience, verbose=1, mode='auto', baseline=None, restore_best_weights=True)) if task: callbacks.append(TrainsCallback(logger=task.get_logger())) if args.lr_schedule: lr_schedule_params = ast.literal_eval( args.lr_schedule_params ) if args.lr_schedule_params else None # Load the lr schedule params lr_schedule_callback = create_lr_schedule_callback( args.lr_schedule, args.lr, lr_schedule_params) if lr_schedule_callback: callbacks.append(lr_schedule_callback) # ---------------------------- # Train # ---------------------------- if args.class_weights == 1 or args.class_weights == "1": args.class_weights = train_dataset.class_weights elif args.class_weights: args.class_weights = args.class_weights.split(",") args.class_weights = { i: float(args.class_weights[i]) for i in range(0, len(args.class_weights)) } # convert list to class_weight dict. print("Using class weights: ", args.class_weights) classifier.fit_generator(generator=pipeline(train_gen, num_classes=num_classes, batch_size=args.batch_size, do_data_aug=True), steps_per_epoch=train_steps, validation_data=val_data, validation_steps=val_steps, epochs=args.epochs, verbose=1, shuffle=True, callbacks=callbacks, use_multiprocessing=True, workers=args.workers, class_weight=args.class_weights)
batch_size = 1 #nb_epoch = 50 nb_epoch = 38 log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") im_dir = "logs/images/" csv_path = "logs/save/" + 'log_' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + '.csv' mcp_save = ModelCheckpoint('weight_lung', save_best_only=True, monitor='val_loss', mode='min', save_weights_only=True) reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=1e-4, mode='min') tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0) p_i_callback = PredictImage(te_data2[0:2], fov01, im_dir) ton_callback = TerminateOnNaN() if not os.path.isfile(csv_path): open(csv_path, 'a').close() csv_log = CSVLogger(csv_path) callbacks_list = [mcp_save, reduce_lr_loss, tensorboard_callback, p_i_callback, csv_log, ton_callback] #callbacks_list = [mcp_save, reduce_lr_loss, tensorboard_callback, csv_log, ton_callback] history = model.fit(tr_data,[tr_mask,tr_pixels], batch_size=batch_size, epochs=nb_epoch, shuffle=True,
def train_model_generator(kmodel, loss_fn): kmodel.compile(loss=loss_fn, optimizer=keras.optimizers.Adam( lr=learning_rate, clipvalue=0.5, clipnorm=1.0), metrics=['accuracy']) t = now() # model.fit(x_train, y_train, # batch_size=batch_size, # epochs=epochs, # verbose=1, # validation_data=(x_test, y_test)) # check input lengths # if (len(train_files) != len(validation_files) or len(train_files) != len(validation_files) or len(validation_files) != len(eval_files)): # raise ValueError("Input file lengths do not match") # Read feature files train_features = gn.readFile(train_files[0]) valid_features = gn.readFile(validation_files[0]) eval_features = gn.readFile(eval_files[0]) # Read labels train_labels = gn.readFile(train_files[1]) valid_labels = gn.readFile(validation_files[1]) eval_labels = gn.readFile(eval_files[1]) train_steps_gen, train_input_size, train_generator = gn.generator_survival( train_features, train_labels, shuffle=True, batch_size=train_batch_size) valid_steps_gen, valid_input_size, val_generator = gn.generator_survival( valid_features, valid_labels, shuffle=True, batch_size=train_batch_size) # for ci _, _, eval_ci_generator = gn.generator_simple( eval_features, eval_labels, batch_size=train_batch_size) _, _, training_ci_generator = gn.generator_simple( train_features, train_labels, batch_size=train_batch_size) # Callbacks tblog = TensorBoard( log_dir=os.path.join(job_dir, 'logs'), histogram_freq=0, write_graph=True, write_images=False, embeddings_freq=0 ) tonan = TerminateOnNaN() concordance_index_eval = ContinuousEval( kmodel, job_dir, eval_ci, eval_frequency, eval_ci_generator, training_ci_generator) kmodel.fit_generator( generator=train_generator, steps_per_epoch=train_steps, epochs=num_epochs, validation_data=val_generator, validation_steps=10, verbose=1, # for tensorboard visualization callbacks=[tblog, tonan, concordance_index_eval]) print('Training time: %s' % (now() - t))
def PGNN_train_test(optimizer_name, optimizer_val, drop_rate, iteration, n_layers, n_nodes, tr_size, lamda, reg): # Hyper-parameters of the training process # batch_size = int(tr_size/2) batch_size = 1 num_epochs = 300 val_frac = 0.25 patience_val = 80 # Initializing results filename exp_name = "FeatureEng_" + optimizer_name + '_drop' + str( drop_rate) + '_nL' + str(n_layers) + '_nN' + str( n_nodes) + '_trsize' + str(tr_size) + '_iter' + str(iteration) exp_name = exp_name.replace('.', 'pt') results_dir = '../results/' model_name = results_dir + exp_name + '_NoPhyInfomodel.h5' # storing the trained model if reg: results_name = results_dir + exp_name + '_results_regularizer.dat' # storing the results of the model else: results_name = results_dir + exp_name + '_results.dat' # storing the results of the model # Load labeled data data = np.loadtxt('../data/labeled_data.dat') # x_labeled = data[:, :-5] # -2 because we do not need porosity predictions x_label = data[:, : -3] # -2 because we do not need porosity predictions x_labeled = np.hstack((x_label[:, :2], x_label[:, -2:])) y_labeled = data[:, -3:-1] # normalize dataset with MinMaxScaler scaler = preprocessing.MinMaxScaler(feature_range=(0, 1.0)) # scaler = preprocessing.StandardScaler() x_labeled = scaler.fit_transform(x_labeled) # y_labeled = scaler.fit_transform(y_labeled) # train and test data trainX, trainY = x_labeled[:tr_size, :], y_labeled[:tr_size] # testX, testY = x_labeled[tr_size:,:], y_labeled[tr_size:] # init_poro = data[tr_size:, -1] testX, testY = x_labeled[tr_size:, :], y_labeled[tr_size:] init_poro = data[tr_size:, -1] # Creating the model model = Sequential() for layer in np.arange(n_layers): if layer == 0: model.add( Dense(n_nodes, activation='relu', input_shape=(np.shape(trainX)[1], ))) else: if reg: model.add( Dense(n_nodes, activation='relu', kernel_regularizer=l1_l2(l1=.001, l2=.001))) else: model.add(Dense(n_nodes, activation='relu')) # model.add(Dropout(rate=drop_rate)) model.add(MCDropout(rate=drop_rate)) model.add(Dense(2, activation='linear')) model.compile(loss='mean_squared_error', optimizer=optimizer_val, metrics=[root_mean_squared_error]) early_stopping = EarlyStopping(monitor='val_loss', patience=patience_val, verbose=1) print('Running...' + optimizer_name) history = model.fit(trainX, trainY, batch_size=batch_size, epochs=num_epochs, verbose=0, validation_split=val_frac, callbacks=[early_stopping, TerminateOnNaN()]) test_score = model.evaluate(testX, testY, verbose=1) print(test_score) # predictions = model.predict(testX) # # inv_pred = scaler.inverse_transform(predictions) # phyloss1 = bond(predictions[:,0]) # physics loss 1 # # init_poro_ndim = np.ones((init_poro.shape)) # # diff2 = poros(init_poro_ndim, predictions[:,1]) # physics loss 2 # phyloss2 = poros(init_poro, predictions[:,1]) # physics loss 2 # phyloss3 = strength1(predictions[:,0], predictions[:,1]) # phyloss4 = strength2(predictions[:,0], predictions[:,1]) # lam1, lam2 = lamda[0], lamda[1] # phyloss = phy_loss_mean([phyloss1, phyloss2, phyloss3, phyloss4, lam1, lam2]) # print('iter: ' + str(iteration) + # ' nL: ' + str(n_layers) + ' nN: ' + str(n_nodes) + # ' trsize: ' + str(tr_size) + # ' TestRMSE: ' + str(test_score[1]) + ' PhyLoss: ' + str(phyloss), "\n") # # model.save(model_name) # # save results # results = {'train_rmse':history.history['root_mean_squared_error'], # 'val_rmse':history.history['val_root_mean_squared_error'], # 'test_rmse':test_score[1], 'PhyLoss':phyloss} # save_obj(results, results_name) # return results, results_name, predictions, testY, test_score[1] # predictions = model.predict(Xx) samples = [] for i in range(int(nsim)): print("simulation num:", i) predictions = model.predict(Xx) predictions = predictions[:, 1] samples.append(predictions) return np.array(samples)
def train(self, dataset_dir, tag, custom_callbacks=None): ''' Train the model. train_dataset, val_dataset: Training and validation Dataset objects. learning_rate: The learning rate to train with epochs: Number of training epochs. Note that previous training epochs are considered to be done alreay, so this actually determines the epochs to train in total rather than in this particaular call. layers: Allows selecting wich layers to train. It can be: - A regular expression to match layer names to train - One of these predefined values: heads: The RPN, classifier and mask heads of the network all: All the layers 3+: Train Resnet stage 3 and up 4+: Train Resnet stage 4 and up 5+: Train Resnet stage 5 and up custom_callbacks: Optional. Add custom callbacks to be called with the keras fit_generator method. Must be list of type keras.callbacks. ''' self.branched_train_config.save( os.path.join(self.log_dir, 'train_stage{}.cfg'.format(self.stage + 1))) if self.stage == 0: self.branched_model_config.save( os.path.join(self.log_dir, '..', 'model.cfg')) infer_config = self.make_inference_config() infer_config.save(os.path.join(self.log_dir, '..', 'infer.cfg')) summary_fp = open( os.path.join(self.log_dir, '..', 'model_summary.log'), 'w') self.keras_model.summary( print_fn=lambda x: summary_fp.write(x + '\n')) summary_fp.close() assert self.log_dir is not None, 'You should call Trainer::set_log_dir() before start training' print('Logging directory:', self.log_dir) if self.train_config.TRAINABLE_LAYERS == 'all': layer_regex = r'.*' train_backbone = True else: layer_regex = '|'.join([ r'{}.*'.format(x) for x in self.train_config.TRAINABLE_LAYERS if x != 'backbone' ]) train_backbone = 'backbone' in self.train_config.TRAINABLE_LAYERS # Data generators train_generator = self.get_data_generator(dataset_dir, 'train', tag) val_generator = self.get_data_generator(dataset_dir, 'val', tag) if hasattr(train_generator, 'class_names') and hasattr( val_generator, 'class_names'): assert train_generator.class_names == val_generator.class_names # Callbacks callbacks = [ TensorBoard(log_dir=self.log_dir, histogram_freq=0, write_graph=(self.stage == 0)), ModelCheckpoint(self.checkpoint_path, verbose=0, save_weights_only=True), TerminateOnNaN(), CSVLogger(os.path.join(self.log_dir, 'loss_stage{}.csv'.format(self.stage + 1)), separator=',', append=True) ] # Add custom callbacks to the list if custom_callbacks: callbacks += custom_callbacks learning_rate = self.train_config.LEARNING_RATE epochs = self.train_config.EPOCHS # Train print('\nStarting at epoch {}. LR={}\n'.format(self.epoch, learning_rate)) print('Checkpoint Path: {}'.format(self.checkpoint_path)) self.set_trainable(layer_regex, train_backbone=train_backbone, train_bn=self.train_config.TRAIN_BN) self.compile() # Work-around for Windows: Keras fails on Windows when using # multiprocessing workers. See discussion here: # https://github.com/matterport/Mask_RCNN/issues/13#issuecomment-353124009 if os.name is 'nt': workers = 0 else: workers = multiprocessing.cpu_count() print('===================Training Stage {}==================='.format( self.stage + 1)) self.branched_train_config.display() self.keras_model.fit_generator( train_generator, initial_epoch=self.epoch, epochs=epochs, steps_per_epoch=self.train_config.STEPS_PER_EPOCH, callbacks=callbacks, validation_data=val_generator, validation_steps=self.train_config.VALIDATION_STEPS, max_queue_size=100, workers=workers, use_multiprocessing=True, ) print('===================Stage {} Finished===================\n\n'. format(self.stage + 1))
# Local optimizer optimizer = Adam(lr=ae_cfg['learning_rate'], amsgrad=True) # Compile with custom weighted loss function ae.compile(optimizer=optimizer, loss=sample_balanced_wmse(eps=ae_cfg['global_eps'], weights=loss_weights)) # Load last round weights and optimizer state if round_idx > 0: ae._make_train_function() ae.optimizer.set_weights(weight_values) ae.load_weights(global_dir + '/tmp_weights_seed%d.h5' % local_seed) # Create list of callbacks callbacks = [TerminateOnNaN()] # Train history = ae.fit(x=llr_train, y=llr_train, batch_size=ae_cfg['batch_size'], epochs=ae_cfg['num_epochs_1'], validation_data=(llr_val, llr_val), verbose=2, callbacks=callbacks) # Write incrementally hdf5storage.savemat(local_dir + '/results.mat', {'val_loss': history.history['val_loss']}, truncate_existing=True) # Evaluate on validation data
# configure callbacks lrate = LearningRateScheduler(step_decay) checkpoint = ModelCheckpoint(WEIGHT_DIR + '/' + WEIGHTS_SAVE, monitor='loss', verbose=0, save_best_only=False, save_weights_only=True, mode='min', period=1) csv_logger = CSVLogger(TRAINING_LOG, append=True) tb = TensorBoard(log_dir=LOGS_DIR, histogram_freq=0, write_graph=True, write_images=False) tnan = TerminateOnNaN() callbacks_list = [lrate, checkpoint, csv_logger, tb, tnan] # sgd optimizer with learning_rate multipliers multisgd = MultiSGD(learning_rate=base_lr, momentum=momentum, decay=0.0, nesterov=False, lr_mult=lr_mult) # start training if use_multiple_gpus is not None: from keras.utils import multi_gpu_model model = multi_gpu_model(model, gpus=use_multiple_gpus)
box_size = sys.argv[1] maps = sys.argv[2] ratio = sys.argv[3] print("box size", sys.argv[1]) print("map size", sys.argv[2]) print("ratio size", sys.argv[3]) print("input argument is working") ############# global variables ############################################################################## start_time = time.time() #limit=100 epochs = 300 # how many times we go through each sample in one iteration before we go to the next sample stop_here_please = EarlyStopping(monitor='val_loss', patience=10) stop_immediately = TerminateOnNaN() csv_logger = CSVLogger("logs/training_box_" + str(box_size) + "_maps_" + str(maps) + "_ratio_" + str(ratio) + ".log") save_best_model = ModelCheckpoint('models/best/my_map_model_box_' + str(box_size) + '_maps_' + str(maps) + '_ratio_' + str(ratio) + '_best.h5', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1) ############# the data preparation ########################################################################### #loading training data x_train = np.load('train_data_binary/train_data_box_' + str(box_size) + '_' +
def fit(self, x, y, validation_x=None, validation_y=None, epochs=100, patience=0, verbose=None, min_delta=0, tensorboard=False, timeline=False, **keras_kwargs): if isinstance(x, pandas.DataFrame): x = x.to_dict(orient='series') if isinstance(validation_x, pandas.DataFrame): validation_x = validation_x.to_dict(orient='series') if not self.keras or not self.optimizer: self.build() with self.session.as_default(): if timeline: run_metadata = tensorflow.RunMetadata() options = tensorflow.RunOptions( trace_level=tensorflow.RunOptions.FULL_TRACE) else: run_metadata = None options = None self.keras.compile(loss=self.loss, optimizer=self.optimizer, options=options, run_metadata=run_metadata) if verbose is None: verbose = 1 if lore.env.name == lore.env.DEVELOPMENT else 0 logger.info('\n'.join([ '\n\n\n Fitting', '==============================', '| batch | learning | |', '| size | rate | decay |', '------------------------------', '| %5i | %8.6f | %7.5f |' % ( self.batch_size, self.learning_rate, self.decay, ), '==============================\n\n' ])) reload_best = ReloadBest( filepath=self.model.checkpoint_path(), monitor=self.monitor, mode='auto', ) callbacks = self.callbacks() callbacks += [ reload_best, TerminateOnNaN(), EarlyStopping( monitor=self.monitor, min_delta=min_delta, patience=patience, verbose=verbose, mode='auto', ), ] if tensorboard: callbacks += [ TensorBoard(log_dir=self.model.tensorboard_path(), histogram_freq=1, batch_size=self.batch_size, write_graph=True, write_grads=True, write_images=True, embeddings_freq=1, embeddings_metadata=None) ] try: with self.session.as_default(): self.history = self.keras_fit( x=x, y=[y] * self.towers, validation_data=Observations(x=validation_x, y=[validation_y] * self.towers), batch_size=self.batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, **keras_kwargs).history except KeyboardInterrupt: logger.warning( 'Caught SIGINT. Training aborted, and its history lost.') return {'loss': []} if timeline: with open(self.model.timeline_path(), 'w') as f: f.write( Timeline(step_stats=run_metadata.step_stats). generate_chrome_trace_format()) return { 'epochs': len(self.history['loss']), 'train': reload_best.train_loss, 'validate': reload_best.validate_loss, }
merged = concatenate([hidden_states_0, hidden_states_1, hidden_states_2]) merged = Dense(num_dense, activation='relu')(merged) merged = Dropout(rate_drop_dense)(merged) predicted = Dense(1, activation='sigmoid')(merged) model = Model(inputs=sequences_input, outputs=predicted) adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999) model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['acc']) STAMP = 'multi' from keras.callbacks import EarlyStopping, ModelCheckpoint early_stopping = EarlyStopping(monitor='val_loss', patience=3) best_model_path = path + 'ulti/' + STAMP + '.h5' model.load_weights(best_model_path) model_checkpoint = ModelCheckpoint(best_model_path, save_best_only=True, save_weights_only=True) class_weight = {0: 1, 1: 1} history = model.fit(padded_train, y_train, validation_data=(padded_test, y_test), epochs=5, batch_size=128, shuffle=True, class_weight=class_weight, callbacks=[early_stopping, model_checkpoint, TerminateOnNaN()]) with open(path+'ulti/history_'+STAMP+'_1.p', 'wb') as f: pickle.dump(history.history, f)
def train_in_memory(model_name, x, y, epochs, verbosity, batch_size, learning_rate, tensorboard, checkpoint, validation_split): seed = 7 np.random.seed(seed) # create model object model_obj = get_model(model_name) if verbosity > 0: print('Creating {} model...'.format(model_obj.name)) if verbosity > 0: print('Compiling model...') model = model_obj.model_for_training(x.shape[1:], dice_coef_loss, Adam(lr=1e-5)) # callbacks if verbosity > 0: print('Creating callbacks...') callbacks = [] # termintate training on NAN callbacks.append(TerminateOnNaN()) # learning rate scheduler # callbacks.append(LearningRateScheduler(lr_scheduler)) if tensorboard: log_dir = join('out', 'logs', 'tensorboard') empty_dir(log_dir) callbacks.append( TensorBoard(log_dir=log_dir, histogram_freq=1, write_graph=False, write_grads=False, write_images=False)) if checkpoint: checkpoint_path = join('out', 'checkpoints') create_dir(checkpoint_path) empty_dir(checkpoint_path) checkpointer = ModelCheckpoint(join(checkpoint_path, 'checkpoint.{epoch:02d}.hdf5'), verbose=0) callbacks.append(checkpointer) if verbosity > 0: print('Checkpoints will be saved to {}'.format(checkpoint_path)) if verbosity > 0: print('Starting training...') if validation_split: x_train, x_validation, y_train, y_validation = train_test_split( x, y, test_size=validation_split, random_state=seed) validation_data = (x_validation, y_validation) else: if verbosity > 0: print('No validation split') x_train = x y_train = y validation_data = None if verbosity > 0: print('Shape before oversampling:') print('x: {}'.format(x_train.shape)) print('y: {}'.format(y_train.shape)) # oversample patches of medium density indices = np.where(y_train <= 20) indices = np.where(y_train[indices] > 5) y_train = np.concatenate((y_train, y_train[indices], y_train[indices])) x_train = np.concatenate((x_train, x_train[indices], x_train[indices])) indices = np.where(y_train[indices] > 20) y_train = np.concatenate((y_train, y_train[indices])) x_train = np.concatenate((x_train, x_train[indices])) if verbosity > 0: print('Shape after oversampling:') print('x: {}'.format(x_train.shape)) print('y: {}'.format(y_train.shape)) if verbosity > 1: model.summary() model.fit(x_train, y_train, validation_data=validation_data, batch_size=batch_size, epochs=epochs, verbose=verbosity, shuffle=True, callbacks=callbacks) # save model after training create_dir('out') out_path = join( 'out', '{}_{}.h5'.format(model_obj.name, datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))) model.save(out_path) if verbosity > 0: print('model has been saved to {}'.format(out_path))
def fit(self, X, treatment, y, p=None): """ Fits the DragonNet model. Args: X (np.matrix or np.array or pd.Dataframe): a feature matrix treatment (np.array or pd.Series): a treatment vector y (np.array or pd.Series): an outcome vector """ X, treatment, y = convert_pd_to_np(X, treatment, y) y = np.hstack((y.reshape(-1, 1), treatment.reshape(-1, 1))) self.dragonnet = self.make_dragonnet(X.shape[1]) metrics = [ regression_loss, binary_classification_loss, treatment_accuracy, track_epsilon ] if self.targeted_reg: loss = make_tarreg_loss(ratio=self.ratio, dragonnet_loss=self.loss_func) else: loss = self.loss_func self.dragonnet.compile(optimizer=Adam(lr=self.learning_rate), loss=loss, metrics=metrics) adam_callbacks = [ TerminateOnNaN(), EarlyStopping(monitor='val_loss', patience=2, min_delta=0.), ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, verbose=self.verbose, mode='auto', min_delta=1e-8, cooldown=0, min_lr=0) ] self.dragonnet.fit(X, y, callbacks=adam_callbacks, validation_split=self.val_split, epochs=self.epochs, batch_size=self.batch_size, verbose=self.verbose) sgd_callbacks = [ TerminateOnNaN(), EarlyStopping(monitor='val_loss', patience=40, min_delta=0.), ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, verbose=self.verbose, mode='auto', min_delta=0., cooldown=0, min_lr=0) ] sgd_lr = 1e-5 momentum = 0.9 self.dragonnet.compile(optimizer=SGD(lr=sgd_lr, momentum=momentum, nesterov=True), loss=loss, metrics=metrics) self.dragonnet.fit(X, y, callbacks=sgd_callbacks, validation_split=self.val_split, epochs=300, batch_size=self.batch_size, verbose=self.verbose)
from pak.evaluation import one_hot_classification as ohc import numpy as np import sys sys.path.insert(0,"../") from cabbage.data import ReId from cabbage.data.MOT16Sampler import MOT16Sampler root = Settings['data_root'] model_root = join(root, 'good_models') #filepath = join(model_root, 'stacknet64x64_84acc.h5') filepath = join(root, 'stacknet_64x64_model.h5') checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint, TerminateOnNaN()] if isfile(filepath): model = load_model(filepath) else: raise Exception("Could not find model!") model.summary() #sampler = ReId.DataSampler(root,64,64) sampler = MOT16Sampler(root, (64, 64)) X, Y = sampler.get_named_batch('MOT16-02', 1000, 4000) X = preprocess_input(X.astype('float64')) Y_ = model.predict(X)
def train_model(self, x_train, y_train, x_val=None, y_val=None, batch_size=None, epochs=50, use_callback=False, best_model_temp=None, verbose=0): if not self.is_fitted: self.model.compile(loss=self.loss, optimizer=self.optimizer) if verbose: print(self.model.summary()) if batch_size is None: if self.train_batch_size is None: # deciding training batch size adaptively self.train_batch_size = int( np.max([2**(np.ceil(np.log2(len(x_train) / 1e4)) + 3), 64])) else: self.train_batch_size = batch_size val_data = None if x_val is not None and y_val is not None: val_data = (x_val, y_val) callbacks = [TerminateOnNaN()] if use_callback: if val_data is not None: callbacks.append( EarlyStopping(monitor='val_loss', min_delta=0, patience=15)) callbacks.append( ReduceLROnPlateau(monitor='val_loss', factor=0.33, patience=4, verbose=verbose, min_delta=0, min_lr=self.learning_rate / 300)) if best_model_temp is not None: self.store_model(best_model_temp) callbacks.append( ModelCheckpoint(best_model_temp + '.h5', monitor='val_loss', save_best_only=True, save_weights_only=True, mode='min')) else: callbacks.append( EarlyStopping(monitor='loss', min_delta=1e-4, patience=5)) callbacks.append( ReduceLROnPlateau(monitor='loss', factor=0.5, patience=3, verbose=verbose, min_delta=0.001, min_lr=self.learning_rate / 20)) self.history = self.model.fit(x_train, y_train, batch_size=self.train_batch_size, epochs=epochs, validation_data=val_data, callbacks=callbacks, verbose=verbose) # ???? can spark cache intermediate model files to support use_best param???? if best_model_temp is not None: self.load_model(best_model_temp) self.is_fitted = True
y=[x_train, x_train], validation_data=(x_val, [x_val, x_val]), shuffle=True, epochs=200, batch_size=400, callbacks=[ EarlyStopping(monitor='val_loss', patience=10, verbose=1, min_delta=0.05), ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=6, epsilon=0.1, verbose=1), TerminateOnNaN() ]) # In[ ]: if do_training: f = plt.figure(figsize=(16, 8)) style = {'loss': '--', 'outputs_par_PDFs_loss': 'o', 'KL_Loss_loss': '--'} for item in style.keys(): plt.plot(np.array(fit_report.history[item][:])[1:], style[item], label=item) plt.plot(np.array(fit_report.history['val_' + item][:])[1:], style[item], label='val_' + item)
def PGNN_train_test(optimizer_name, optimizer_val, pre_train, tr_size, lamda, iteration, n_nodes, n_layers, drop_frac, reg, samp): # Hyper-parameters of the training process # batch_size = int(tr_size/2) batch_size = 1 num_epochs = 300 val_frac = 0.25 patience_val = 80 # Initializing results filename exp_name = "DNN_pre_hyb_" + pre_train + optimizer_name + '_trsize' + str( tr_size) + '_lamda' + str(lamda) + '_iter' + str(iteration) exp_name = exp_name.replace('.', 'pt') results_dir = '../results/' model_name = results_dir + exp_name + '_NoPhyInfomodel.h5' # storing the trained model if reg == True and samp == 25: results_name = results_dir + exp_name + '_results_25_regularizer.dat' # storing the results of the model elif reg == False and samp == 25: results_name = results_dir + exp_name + '_results_25.dat' # storing the results of the model elif reg == True and samp == 1519: results_name = results_dir + exp_name + '_results_1519_regularizer.dat' # storing the results of the model elif reg == False and samp == 1519: results_name = results_dir + exp_name + '_results_1519.dat' # storing the results of the model # Load labeled data data = np.loadtxt('../data/labeled_data.dat') x_label = data[:, : -3] # -2 because we do not need porosity predictions x_labeled = np.hstack((x_label[:, :2], x_label[:, -2:])) y_labeled = data[:, -3:-1] # if samp==25: # data2 = np.loadtxt('../data/unlabeled_data_BK_constw_v2_25.dat') # x_unlabeled = data2[:, :] # elif samp==1519: # data2 = np.loadtxt('../data/unlabeled_data_BK_constw_v2_1525.dat') # data1 = data2[:1303, :] # data2 = data2[-6:, :] # datah = np.vstack((data1,data2)) # # np.random.shuffle(datah) # x_labeled = np.hstack((datah[:, :2],datah[:,-3:-1])) # # x_unlabeled = datah[:, :2] # 1303 last regular sample # y_unlabeled = datah[:, -3:-1] # normalize dataset with MinMaxScaler scaler = preprocessing.MinMaxScaler(feature_range=(0, 1.0)) # scaler = preprocessing.StandardScaler() x_labeled = scaler.fit_transform(x_labeled) # y_labeled = scaler.fit_transform(y_labeled) # train and test data trainX, trainY = x_labeled[:tr_size, :], y_labeled[:tr_size] # testX, testY = x_labeled[tr_size:,:], y_labeled[tr_size:] # init_poro = data[tr_size:, -1] testX, testY = x_labeled[tr_size:, :], y_labeled[tr_size:] init_poro = data[tr_size:, -1] dependencies = {'root_mean_squared_error': root_mean_squared_error} # load the pre-trained model using non-calibrated physics-based model predictions (./data/unlabeled.dat) loaded_model = load_model(results_dir + pre_train, custom_objects=dependencies) # Creating the model model = Sequential() for layer in np.arange(n_layers): if layer == 0: model.add( Dense(n_nodes, activation='relu', input_shape=(np.shape(trainX)[1], ))) else: if reg: model.add( Dense(n_nodes, activation='relu', kernel_regularizer=l1_l2(l1=.001, l2=.001))) else: model.add(Dense(n_nodes, activation='relu')) model.add(Dropout(rate=drop_frac)) model.add(Dense(2, activation='linear')) # pass the weights to all layers but 1st input layer, whose dimensions are updated for new_layer, layer in zip(model.layers[1:], loaded_model.layers[1:]): new_layer.set_weights(layer.get_weights()) model.compile(loss='mean_squared_error', optimizer=optimizer_val, metrics=[root_mean_squared_error]) early_stopping = EarlyStopping(monitor='val_loss', patience=patience_val, verbose=1) print('Running...' + optimizer_name) history = model.fit(trainX, trainY, batch_size=batch_size, epochs=num_epochs, verbose=0, validation_split=val_frac, callbacks=[early_stopping, TerminateOnNaN()]) test_score = model.evaluate(testX, testY, verbose=1) print(test_score) # predictions = model.predict(testX) # # inv_pred = scaler.inverse_transform(predictions) # phyloss1 = bond(predictions[:,0]) # physics loss 1 # # init_poro_ndim = np.ones((init_poro.shape)) # # diff2 = poros(init_poro_ndim, predictions[:,1]) # physics loss 2 # phyloss2 = poros(init_poro, predictions[:,1]) # physics loss 2 # phyloss3 = strength1(predictions[:,0], predictions[:,1]) # phyloss4 = strength2(predictions[:,0], predictions[:,1]) # lam1, lam2 = lamda[0], lamda[1] # phyloss = phy_loss_mean([phyloss1, phyloss2, phyloss3, phyloss4, lam1, lam2]) # print('iter: ' + str(iteration) + # ' nL: ' + str(n_layers) + ' nN: ' + str(n_nodes) + # ' trsize: ' + str(tr_size) + # ' TestRMSE: ' + str(test_score[1]) + ' PhyLoss: ' + str(phyloss), "\n") # # model.save(model_name) # # save results # results = {'train_rmse':history.history['root_mean_squared_error'], # 'val_rmse':history.history['val_root_mean_squared_error'], # 'test_rmse':test_score[1], 'PhyLoss':phyloss} # save_obj(results, results_name) # return results, results_name, predictions, testY, test_score[1] # predictions = model.predict(Xx) samples = [] for i in range(int(nsim)): print("simulation num:", i) predictions = model.predict(Xx) predictions = predictions[:, 1] samples.append(predictions[:, np.newaxis]) return np.array(samples)
monitor='val_acc', verbose=1, save_best_only=True) lr_scheduler = LearningRateScheduler(lr_schedule) lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6) logdir = "logs/" + args.log_dir + "/" + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir) term = TerminateOnNaN() callbacks = [checkpoint, lr_reducer, lr_scheduler, tensorboard_callback, term] # Run training, with or without data augmentation. if not data_augmentation: print('Not using data augmentation.') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True, callbacks=callbacks) else: print('Using real-time data augmentation.')
def train(): """Train Keras NALU model on counting task""" model_dir = path.dirname(__file__) number_width = 16 X_train, Y_train = generate_dataset( batch_size=2**18, multiplier=1, number_width=number_width, ) X_validation, Y_validation = generate_dataset( batch_size=2**9, multiplier=9999, number_width=number_width, ) X_test, Y_test = generate_dataset( batch_size=2**9, multiplier=9999, number_width=number_width, ) inputs = Input(shape=(2 * number_width, )) hidden = NALU(units=number_width, cell='m')(inputs) outputs = NALU(units=1, cell='a')(hidden) callbacks = [ TerminateOnNaN(), ReduceLROnPlateau( factor=0.1, min_lr=1e-16, patience=50, verbose=1, ), EarlyStopping( patience=200, restore_best_weights=True, verbose=1, ), ] model = Model(inputs=inputs, outputs=outputs) model.summary() model.compile(loss='mae', optimizer=RMSprop(lr=0.01)) model.fit( batch_size=256, callbacks=callbacks, epochs=1000, validation_data=(X_validation, Y_validation), verbose=2, x=X_train, y=Y_train, ) model.evaluate( batch_size=256, verbose=1, x=X_test, y=Y_test, ) model.save(path.join(model_dir, 'model.h5'))
def main(job_dir, **args): ##Setting up the path for saving logs logs_dir = job_dir + 'logs/' data_dir = "gs://deeplearningteam11/data" print("Current Directory: " + os.path.dirname(__file__)) print("Lets copy the data to: " + os.path.dirname(__file__)) os.system("gsutil -m cp -r " + data_dir + " " + os.path.dirname(__file__) + " > /dev/null 2>&1 ") #exit(0) with tf.device('/device:GPU:0'): # 1: Build the Keras model. K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) # Set the path to the `.h5` file of the model to be loaded. model_file = file_io.FileIO('gs://deeplearningteam11/vgg19BNmodel.h5', mode='rb') # Store model locally on instance model_path = 'model.h5' with open(model_path, 'wb') as f: f.write(model_file.read()) model_file.close() ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'L2Normalization': L2Normalization, 'DecodeDetections': DecodeDetections, 'compute_loss': ssd_loss.compute_loss }) for layer in model.layers: layer.trainable = True model.summary() # 1: Instantiate two `DataGenerator` objects: One for training, one for validation. train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) # 2: Parse the image and label lists for the training and validation datasets. This can take a while. # VOC 2007 # The directories that contain the images. VOC_2007_train_images_dir = 'data/data/VOC2007/train/JPEGImages/' VOC_2007_test_images_dir = 'data/data/VOC2007/test/JPEGImages/' VOC_2007_train_anns_dir = 'data/data/VOC2007/train/Annotations/' VOC_2007_test_anns_dir = 'data/data/VOC2007/test/Annotations/' # The paths to the image sets. VOC_2007_trainval_image_set_dir = 'data/data/VOC2007/train/ImageSets/Main/' VOC_2007_test_image_set_dir = 'data/data/VOC2007/test/ImageSets/Main/' VOC_2007_train_images_dir = os.path.dirname( __file__) + "/" + VOC_2007_train_images_dir VOC_2007_test_images_dir = os.path.dirname( __file__) + "/" + VOC_2007_test_images_dir VOC_2007_train_anns_dir = os.path.dirname( __file__) + "/" + VOC_2007_train_anns_dir VOC_2007_test_anns_dir = os.path.dirname( __file__) + "/" + VOC_2007_test_anns_dir VOC_2007_trainval_image_set_dir = os.path.dirname( __file__) + "/" + VOC_2007_trainval_image_set_dir VOC_2007_test_image_set_dir = os.path.dirname( __file__) + "/" + VOC_2007_test_image_set_dir VOC_2007_trainval_image_set_filename = VOC_2007_trainval_image_set_dir + '/trainval.txt' VOC_2007_test_image_set_filename = VOC_2007_test_image_set_dir + '/test.txt' # The XML parser needs to now what object class names to look for and in which order to map them to integers. classes = [ 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] print("Parsing Training Data ...") train_dataset.parse_xml( images_dirs=[VOC_2007_train_images_dir], image_set_filenames=[VOC_2007_trainval_image_set_filename], annotations_dirs=[VOC_2007_train_anns_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False, verbose=False) print("Done") print( "================================================================") print("Parsing Test Data ...") val_dataset.parse_xml( images_dirs=[VOC_2007_test_images_dir], image_set_filenames=[VOC_2007_test_image_set_filename], annotations_dirs=[VOC_2007_test_anns_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=True, ret=False, verbose=False) print("Done") print( "================================================================") # 3: Set the batch size. batch_size = 32 # Change the batch size if you like, or if you run into GPU memory issues. # 4: Set the image transformations for pre-processing and data augmentation options. # For the training generator: ssd_data_augmentation = SSDDataAugmentation(img_height=img_height, img_width=img_width, background=mean_color) # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('conv4_4_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3], model.get_layer('conv10_2_mbox_conf').output_shape[1:3], model.get_layer('conv11_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) # Define a learning rate schedule. def lr_schedule(epoch): return 1e-6 # if epoch < 80: # return 0.001 # elif epoch < 100: # return 0.0001 # else: # return 0.00001 learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [learning_rate_scheduler, terminate_on_nan] # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly. initial_epoch = 120 final_epoch = 200 steps_per_epoch = 500 history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch) model_name = "vgg19BNmodel_cont.h5" model.save(model_name) with file_io.FileIO(model_name, mode='rb') as input_f: with file_io.FileIO("gs://deeplearningteam11/" + model_name, mode='w+') as output_f: output_f.write(input_f.read())
def main(): if 'test' not in config: logging.warning('Path to validation set does not exist') quit() path = config['train'] file_list = glob.glob(os.path.join(path, '*')) logging.info('number of h5 files: {}'.format(len(file_list))) if mode == 0: module_network = os.getcwd().replace('bin', 'network') sys.path.append(module_network) from network.convlstm import AudioFeat from network.convlstm import Music2dance module_utils = os.getcwd().replace('bin', 'utils') sys.path.append(module_utils) from utils.dataset import DataGenerator folder_models = os.path.join(args.out, 'models') train_dataset = DataGenerator(path, args.batch, args.sequence, 'train', args.init_step, shuffle=True) batch_0 = train_dataset[270] input_encoder_shape = batch_0[0][0].shape[1:] output_shape = batch_0[1][1].shape[1] if not os.path.exists(folder_models): os.makedirs(folder_models) os.makedirs(os.path.join(folder_models, 'logs')) model = Music2dance(input_encoder_shape, output_shape, feat_dim=60, units=100, batchsize=args.batch) model.build(batch_0[0][0].shape) model._set_inputs(inputs=batch_0[0]) optimizer = optimizers.adam(lr=args.base_lr) model.compile(loss=losses.mean_squared_error, optimizer=optimizer) #plot_model(model, show_layer_names=True, show_shapes=True, to_file=os.path.join(args.out, 'model.png')) print(model.summary()) model_saver = ModelCheckpoint(filepath=os.path.join( folder_models, 'model.ckpt.{epoch:04d}.hdf5'), verbose=1, save_best_only=False, save_weights_only=True, mode='auto', period=1) '''def lr_scheduler(epoch, lr): decay_rate = 0.90 decay_step = 20 if epoch % decay_step == 0 and epoch: return lr * decay_rate return lr''' def lr_scheduler(epoch): if epoch < 10: return args.base_lr else: return args.base_lr * np.exp(0.05 * (10 - epoch)) logs = os.path.join(folder_models, 'logs/{}'.format(time())) tensorboard = TensorBoard(log_dir=logs) callbacks_list = [ model_saver, TerminateOnNaN(), LearningRateScheduler(lr_scheduler, verbose=1), tensorboard ] if args.validation_set: validation_path = config['test'] test_dataset = DataGenerator(validation_path, args.batch, args.sequence, 'test', args.init_step, shuffle=True) history = model.fit_generator( train_dataset, validation_data=test_dataset, epochs=args.epochs, use_multiprocessing=args.multiprocessing, workers=args.workers, callbacks=callbacks_list, verbose=args.verbose) else: history = model.fit_generator( train_dataset, epochs=args.epochs, use_multiprocessing=args.multiprocessing, workers=args.workers, callbacks=callbacks_list, verbose=args.verbose) model.save_weights(os.path.join(args.out, 'models', 'model.h5')) with open(os.path.join(args.out, 'trainHistoryDict'), 'wb') as file_pi: pickle.dump(history.history, file_pi) def plot_loss(hist, save): # Plot training & validation loss values plt.plot(hist.history['loss']) if 'val_loss' in hist.history.keys(): plt.plot(hist.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') plt.savefig(os.path.join(save, 'loss_values.png')) plot_loss(history, args.out) elif mode == 1: module_network = os.getcwd().replace('bin', 'network') sys.path.append(module_network) from network.convLSTM2dMoldel2 import ConvLSTM2dModel module_utils = os.getcwd().replace('bin', 'utils') sys.path.append(module_utils) from utils.dataset2 import DataGenerator2 train_dataset = DataGenerator2(path, args.batch, args.sequence, args.sequence_out, 'train', args.init_step, shuffle=True) batch_0 = train_dataset[270] input_encoder_shape = batch_0[0][0].shape[1:] input_decoder_shape = batch_0[0][1].shape[1:] output_shape = batch_0[1].shape[1:] folder_models = os.path.join(args.out, 'models') if not os.path.exists(folder_models): os.makedirs(folder_models) model = ConvLSTM2dModel(input_encoder_shape, output_shape, args.base_lr) plot_model(model, show_layer_names=True, show_shapes=True, to_file=os.path.join(args.out, 'model.png')) model_saver = ModelCheckpoint(filepath=os.path.join( folder_models, 'model.ckpt.{epoch:04d}.hdf5'), verbose=1, save_best_only=True, save_weights_only=True, mode='auto', period=1) print(model.summary()) '''def lr_scheduler(epoch, lr): decay_rate = 0.90 decay_step = 20 if epoch % decay_step == 0 and epoch: return lr * decay_rate return lr''' def lr_scheduler(epoch): if epoch < 10: return args.base_lr else: return args.base_lr * np.exp(0.1 * (10 - epoch)) callbacks_list = [ model_saver, TerminateOnNaN(), LearningRateScheduler(lr_scheduler, verbose=1) ] if args.validation_set: validation_path = config['test'] test_dataset = DataGenerator2(validation_path, args.batch, args.sequence, args.sequence_out, 'test', args.init_step, shuffle=True) history = model.fit_generator( train_dataset, validation_data=test_dataset, epochs=args.epochs, use_multiprocessing=args.multiprocessing, workers=args.workers, callbacks=callbacks_list, verbose=args.verbose) else: history = model.fit_generator( train_dataset, epochs=args.epochs, use_multiprocessing=args.multiprocessing, workers=args.workers, callbacks=callbacks_list, verbose=args.verbose) model.save_weights(os.path.join(args.out, 'models', 'model.h5')) with open(os.path.join(args.out, 'trainHistoryDict'), 'wb') as file_pi: pickle.dump(history.history, file_pi) def plot_loss(hist, save): # Plot training & validation loss values plt.plot(hist.history['loss']) if 'val_loss' in hist.history.keys(): plt.plot(hist.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') plt.savefig(os.path.join(save, 'loss_values.png')) plot_loss(history, args.out)