def main(experiment_config, experiment_results_dir): ############################################ #TODO: Moving towards defining most or all run parameters in separate config files ############################################ domain = experiment_config.domain label_mapping_filepath = experiment_config['label_mappings'] label_encoder = LabelEncoder(filepath=label_mapping_filepath) print(label_encoder) trainer = CSVTrainer(experiment_config, label_encoder=label_encoder) trainer.init_model_builder() model_filepath = os.path.join( trainer.model_manager.model_dir, trainer.model_name + '_' + domain + '_model.h5') train_data = trainer.get_data_loader(subset='train') val_data = trainer.get_data_loader(subset='val') test_data = trainer.get_data_loader(subset='test') #Get parameters for fitting and callbacks fit_params = trainer.get_fit_params() callbacks = get_callbacks(weights_best=os.path.join( trainer.model_manager.model_dir, trainer.model_name + '_' + domain + '_model_weights_best.h5'), logs_dir=os.path.join(experiment_results_dir, 'tensorboard_logs'), restore_best_weights=True) history = trainer.fit(train_data, steps_per_epoch=fit_params['steps_per_epoch'], epochs=fit_params['epochs'], validation_data=val_data, validation_steps=fit_params['validation_steps'], callbacks=callbacks) #, # history_name=domain # ) trainer.histories[domain] = history trainer.save_model(filepath=model_filepath) ####################################################################### # TARGET DOMAIN #trainer.load_model(filepath=source_model_filepath) num_test_samples = trainer.metadata_splits['test']['num_samples'] num_steps = num_test_samples // trainer.config['batch_size'] test_results = [ trainer.evaluate(test_data, steps=num_steps, log_name='test') ] #'trained-on-source_train--evaluated-on-source_test')] trainer.test_results = test_results return trainer
def main(experiment_config, experiment_dir): trainer = BaseTrainer(experiment_config=experiment_config) # for subset, paths in trainer.tfrecord_files.items(): # if experiment_config.verbose: print(subset) # for path in paths: # if experiment_config.verbose: print('\t',path) # mlflow.log_artifact(path,f'artifacts/{subset}') train_data = trainer.get_data_loader(subset='train') val_data = trainer.get_data_loader(subset= 'val') test_data = trainer.get_data_loader(subset='test') model_params = trainer.get_model_config('train') fit_params = trainer.get_fit_params() callbacks = get_callbacks(weights_best=os.path.join(experiment_dir,'weights_best.h5'), logs_dir=os.path.join(experiment_dir,'tensorboard_logs'), restore_best_weights=False, val_data=None) print('model_params',model_params) if 'vgg16' in experiment_config.model_name: model_builder = VGG16GrayScale(model_params) model = model_builder.build_model() elif 'resnet' in experiment_config.model_name: print('ResNet still in progress') return None history = model.fit(train_data, steps_per_epoch = fit_params['steps_per_epoch'], epochs=fit_params['epochs'], validation_data=val_data, validation_steps=fit_params['validation_steps'], callbacks=callbacks ) trainer.config['model_config'] = model_params trainer.config.train_config['fit_params'] = fit_params trainer.history = history return trainer
def main(experiment_config, experiment_dir): ############################################ #TODO: Moving towards defining most or all run parameters in separate config files ############################################ trainer = BaseTrainer(experiment_config=experiment_config) # for subset, paths in trainer.tfrecord_files.items(): # if experiment_config.verbose: print(subset) # for path in paths: # if experiment_config.verbose: print('\t',path) # mlflow.log_artifact(path,f'artifacts/{subset}') train_data = trainer.get_data_loader(subset='train') val_data = trainer.get_data_loader(subset='val') test_data = trainer.get_data_loader(subset='test') # debug=False # if debug: # if tf.executing_eagerly(): # batch_imgs, batch_labels = next(iter(val_data)) # else: # validation_iterator = val_data.make_one_shot_iterator() # val_data_next = validation_iterator.get_next() # sess = tf.compat.v1.Session() # batch_imgs, batch_labels = sess.run(val_data_next) # from pyleaves.analysis.img_utils import plot_image_grid # plot_image_grid(batch_imgs, [np.argmax(l) for l in batch_labels], 8, 8) # for i in range(64): # img = batch_imgs[i,...] # print(i, f'min = {np.min(img):.2f}, max = {np.max(img):.2f}, mean = {np.mean(img):.2f}, std = {np.std(img):.2f}') # #From [-1.0,1.0] to [0,255] # uint_imgs = np.array(batch_imgs) # uint_imgs += 1 # uint_imgs /= 2 # uint_imgs *= 255 # uint_imgs = uint_imgs.astype(np.uint8) # print(f'min = {np.min(batch_imgs):.2f}, max = {np.max(batch_imgs):.2f}, mean = {np.mean(batch_imgs):.2f}, std = {np.std(batch_imgs):.2f}') # print(f'min = {np.min(uint_imgs)}, max = {np.max(uint_imgs)}, mean = {np.mean(uint_imgs):.2f}, std = {np.std(uint_imgs):.2f}') # plot_image_grid(uint_imgs, [np.argmax(l) for l in batch_labels], 8, 8) trainer.init_model_builder() # model_config = trainer.get_model_config('train') fit_params = trainer.get_fit_params() callbacks = get_callbacks(weights_best=os.path.join( experiment_dir, 'weights_best.h5'), logs_dir=os.path.join(experiment_dir, 'tensorboard_logs'), restore_best_weights=False, val_data=None) # model_name = model_config.model_name # print('model_config:\n',json.dumps(model_config,indent=4)) # if model_name is 'vgg16': # model_builder = VGG16GrayScale(model_config) # model = model_builder.build_model() # elif model_name.startswith('resnet'): # model_builder = ResNet(model_config) # model = model_builder.build_model() # else: # model = build_model(**model_config) history = trainer.model.fit( train_data, steps_per_epoch=fit_params['steps_per_epoch'], epochs=fit_params['epochs'], validation_data=val_data, validation_steps=fit_params['validation_steps'], callbacks=callbacks) # trainer.config['model_config'] = model_config # trainer.config.train_config['fit_params'] = fit_params trainer.history = history return trainer
def main(experiment_configs, experiment_results_dir): ############################################ #TODO: Moving towards defining most or all run parameters in separate config files ############################################ label_encoders = {} for i, domain in enumerate(['source', 'target']): label_mapping_filepath = experiment_configs[i]['label_mappings'] label_encoders.update( {domain: LabelEncoder(filepath=label_mapping_filepath)}) print(domain, len(label_encoders[domain])) trainer = TransferTrainer(experiment_configs, trainer_constructor=CSVTrainer, label_encoders=label_encoders) trainer.init_model_builder(domain='source') source_model_filepath = os.path.join( trainer.model_manager.model_dir, trainer.model_name + '_source_model.h5') target_model_filepath = os.path.join( trainer.model_manager.model_dir, trainer.model_name + '_target_model.h5') source_train_data = trainer.get_data_loader(domain='source', subset='train') source_val_data = trainer.get_data_loader(domain='source', subset='val') #Get parameters for fitting and callbacks fit_params = trainer.get_fit_params(domain='source') callbacks = get_callbacks(weights_best=os.path.join( trainer.model_manager.model_dir, 'source_domain_weights_best.h5'), logs_dir=os.path.join(experiment_results_dir, 'tensorboard_logs'), restore_best_weights=True) # TRAIN ON SOURCE DOMAIN history = trainer.fit(source_train_data, steps_per_epoch=fit_params['steps_per_epoch'], epochs=fit_params['epochs'], validation_data=source_val_data, validation_steps=fit_params['validation_steps'], callbacks=callbacks, history_name='source') trainer.histories['source'] = history trainer.save_model(filepath=source_model_filepath) ####################################################################### # TARGET DOMAIN trainer.load_model(filepath=source_model_filepath) target_train_data = trainer.get_data_loader(domain='target', subset='train') target_val_data = trainer.get_data_loader(domain='target', subset='val') target_test_data = trainer.get_data_loader(domain='target', subset='test') fit_params = trainer.get_fit_params(domain='target') callbacks = get_callbacks(weights_best=os.path.join( trainer.model_manager.model_dir, 'target_domain_weights_best.h5'), logs_dir=os.path.join(experiment_results_dir, 'tensorboard_logs'), restore_best_weights=True) num_test_samples = trainer.domains['target'].metadata_splits['test'][ 'num_samples'] num_steps = num_test_samples // trainer.domains['target'].config[ 'batch_size'] test_results = [] test_results += [ trainer.evaluate(target_test_data, steps=num_steps, log_name='0-shot_test') ] # FINETUNE ON TARGET DOMAIN history = trainer.fit(target_train_data, steps_per_epoch=fit_params['steps_per_epoch'], epochs=fit_params['epochs'], validation_data=target_val_data, validation_steps=fit_params['validation_steps'], callbacks=callbacks, history_name='target') trainer.histories['target'] = history test_results += [ trainer.evaluate(target_test_data, steps=num_steps, log_name='test_acc') ] trainer.test_results = test_results return trainer
def main(): from pprint import pprint import sys import os import numpy as np import random gpu = 0 if '--gpu' in sys.argv: gpu = int(sys.argv[sys.argv.index('--gpu') + 1]) print('--gpu ', gpu) os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) print('os.environ["CUDA_VISIBLE_DEVICES"] : ', os.environ["CUDA_VISIBLE_DEVICES"]) import tensorflow as tf # tf.compat.v1.enable_eager_execution() print('tf.executing_eagerly()', tf.executing_eagerly()) from stuf import stuf from pyleaves.configs.config_v2 import BaseConfig import json try: config = BaseConfig().parse(args=sys.argv[1:]) except Exception as e: print(e) print("missing or invalid arguments") print('sys.argv = ', sys.argv[1:]) exit(0) from pyleaves.leavesdb.tf_utils.tf_utils import set_random_seed set_random_seed(config.seed) from pyleaves.base.base_data_manager import DataManager from pyleaves.base.base_trainer import ModelBuilder, BaseTrainer from pyleaves.train.callbacks import get_callbacks from pyleaves.loggers.mlflow_logger import MLFlowLogger as Logger data_manager = DataManager(config=config) train_data = data_manager.get_data_loader(file_group='train') val_data = data_manager.get_data_loader(file_group='val') test_data = data_manager.get_data_loader(file_group='test') model_builder = ModelBuilder(config) callbacks = get_callbacks( weights_best=os.path.join(config.model_config.model_dir, 'weights_best.h5'), logs_dir=os.path.join(config.model_config.log_dir, 'tensorboard_logs'), val_data=val_data, batches_per_epoch=0, #30, freq=0, #5, histogram_freq=0, restore_best_weights=True, seed=config.seed) logger = Logger(config) trainer = BaseTrainer(config, model_builder, data_manager, logger, callbacks) pprint(config) print('INITIATING TRAINING') import numpy as np # import pdb; pdb.set_trace() class_weights = trainer.class_weights class_weights = class_weights.assign(y=class_weights['y'] / np.max(trainer.class_weights['y'])) trainer.train(class_weights=None) #class_weights) trainer.save_model(config.run_id + '_model') trainer.test()
def main(experiment_configs, experiment_dir): ############################################ #TODO: Moving towards defining most or all run parameters in separate config files ############################################ trainer = TransferTrainer(experiment_configs, src_db=os.path.join(pyleaves.RESOURCES_DIR, 'updated_leavesdb.db')) trainer.init_model_builder(domain='source') source_model_filepath = os.path.join( trainer.model_manager.model_dir, trainer.model_name + '_source_model.h5') target_model_filepath = os.path.join( trainer.model_manager.model_dir, trainer.model_name + '_target_model.h5') source_train_data = trainer.get_data_loader(domain='source', subset='train') source_val_data = trainer.get_data_loader(domain='source', subset='val') #Get parameters for fitting and callbacks fit_params = trainer.get_fit_params(domain='source') callbacks = get_callbacks(weights_best=os.path.join( experiment_dir, 'source_domain_weights_best.h5'), logs_dir=os.path.join(experiment_dir, 'tensorboard_logs'), restore_best_weights=True) # TRAIN ON SOURCE DOMAIN history = trainer.fit(source_train_data, steps_per_epoch=fit_params['steps_per_epoch'], epochs=fit_params['epochs'], validation_data=source_val_data, validation_steps=fit_params['validation_steps'], callbacks=callbacks, history_name='source') trainer.histories['source'] = history trainer.save_model(filepath=source_model_filepath) ####################################################################### # TARGET DOMAIN #trainer.load_model(filepath=source_model_filepath) target_train_data = trainer.get_data_loader(domain='target', subset='train') target_val_data = trainer.get_data_loader(domain='target', subset='val') target_test_data = trainer.get_data_loader(domain='target', subset='test') fit_params = trainer.get_fit_params(domain='target') callbacks = get_callbacks(weights_best=os.path.join( experiment_dir, 'target_domain_weights_best.h5'), logs_dir=os.path.join(experiment_dir, 'tensorboard_logs'), restore_best_weights=True) num_test_samples = trainer.domains['target'].metadata_splits['test'][ 'num_samples'] num_steps = num_test_samples // trainer.domains['target'].config[ 'batch_size'] test_results = [] test_results += [ trainer.evaluate( target_test_data, steps=num_steps, log_name='trained:[source_train],evaluate:[target_test]') ] # FINETUNE ON TARGET DOMAIN history = trainer.fit(target_train_data, steps_per_epoch=fit_params['steps_per_epoch'], epochs=fit_params['epochs'], validation_data=target_val_data, validation_steps=fit_params['validation_steps'], callbacks=callbacks, history_name='target') trainer.histories['target'] = history test_results += [ trainer.evaluate( target_test_data, steps=num_steps, log_name= 'trained:[source_train+target_train],evaluate:[target_test]') ] trainer.test_results = test_results return trainer
train_data = trainer.get_data_loader( subset='train') #, skip_preprocessing=True) val_data = trainer.get_data_loader(subset='train') #'val') test_data = trainer.get_data_loader(subset='test') model_params = trainer.get_model_params('train') fit_params = trainer.get_fit_params() with mlflow.start_run( run_name= f'tfds-{args.model_name}-{args.dataset_name}-lr_{args.base_learning_rate}_baseline', nested=True): mlflow.tensorflow.autolog() callbacks = get_callbacks( weights_best=os.path.join(experiment_dir, 'weights_best.h5'), logs_dir=os.path.join(experiment_dir, 'logdir'), restore_best_weights=False) print('model_params', model_params) model = build_model( **model_params ) #name='shallow', num_classes=10000, frozen_layers=(0,-4), input_shape=(224,224,3), base_learning_rate=0.0001) history = model.fit(train_data, steps_per_epoch=fit_params['steps_per_epoch'], epochs=fit_params['epochs'], validation_data=val_data, validation_steps=fit_params['validation_steps'], callbacks=callbacks)
def main(dataset_name='PNAS', model_name='vgg16', experiment_dir=r'/media/data/jacob/Fossil_Project/vgg16/PNAS', gpu_ids=[0], tfrecord_root_dir=r'/media/data/jacob/Fossil_Project/tfrecord_data', batch_size=64, target_size=(224, 224), base_learning_rate=0.001, num_epochs=100, preprocessing='imagenet', augment_images=False): reset_eager_session() # tf.reset_default_graph() dataset_config = DatasetConfig(dataset_name=dataset_name, label_col='family', target_size=target_size, channels=3, low_class_count_thresh=3, data_splits={ 'val_size': 0.2, 'test_size': 0.2 }, tfrecord_root_dir=tfrecord_root_dir, num_shards=10) train_config = TrainConfig(model_name=model_name, batch_size=batch_size, frozen_layers=(0, -4), base_learning_rate=base_learning_rate, buffer_size=1000, num_epochs=num_epochs, preprocessing=preprocessing, augment_images=augment_images, seed=3) experiment_config = ExperimentConfig(dataset_config=dataset_config, train_config=train_config) ############################################ #TODO: Move config definitions outside main() for: # 1. simplifying overall logic in main & segregating configuration to section marked by if __name__=='__main__' # 2. Moving towards defining most or all run parameters in separate config files ############################################ def build_tunable_model(hp): return build_model(name=hp.Choice('name', values=[ 'shallow', 'vgg16', 'xception', 'resnet_50_v2', 'resnet_101_v2' ]), num_classes=10000, frozen_layers=(0, -4), input_shape=(224, 224, 3), base_learning_rate=hp.Float('base_learning_rate', min_value=1e-6, max__value=0.01, sampling='log')) RandomSearch(build_tunable_model, objective='val_accuracy', max_trials=num_epochs, seed=seed, hyperparameters=None, tune_new_entries=True, allow_new_entries=True, **kwargs) trainer = BaseTrainer(experiment_config=experiment_config) train_data = trainer.get_data_loader(subset='train') val_data = trainer.get_data_loader(subset='val') test_data = trainer.get_data_loader(subset='test') # AUTOTUNE = tf.data.experimental.AUTOTUNE # train_data = tfds.load("mnist", split='train').shuffle(1000).batch(batch_size).repeat().prefetch(AUTOTUNE) model_params = trainer.get_model_params('train') fit_params = trainer.get_fit_params() callbacks = get_callbacks(weights_best=os.path.join( experiment_dir, 'weights_best.h5'), logs_dir=os.path.join(experiment_dir, 'logdir'), restore_best_weights=False) model = build_model( **model_params ) #name='shallow', num_classes=10000, frozen_layers=(0,-4), input_shape=(224,224,3), base_learning_rate=0.0001) history = model.fit(train_data, steps_per_epoch=fit_params['steps_per_epoch'], epochs=fit_params['epochs'], validation_data=val_data, validation_steps=fit_params['validation_steps'], callbacks=callbacks) return history