def restore_or_initialize_experiment(cfg, restore_last=False, prefix='log_dir__', verbose=0): # date_format = '%Y-%m-%d_%H-%M-%S' cfg = copy.deepcopy(cfg) cfg.experiment.experiment_name = '_'.join([cfg.stage_0.dataset.dataset_name, cfg.stage_0.model.model_name]) cfg.experiment.experiment_dir = os.path.join(cfg.experiment.neptune_experiment_dir, cfg.experiment.experiment_name) ensure_dir_exists(cfg.experiment.experiment_dir) if restore_last: experiment_files = [(exp_name.split(prefix)[-1], exp_name) for exp_name in os.listdir(cfg.experiment.experiment_dir)] keep_files = [] for i in range(len(experiment_files)): exp = experiment_files[i] try: keep_files.append((datetime.strptime(exp[0], date_format), exp[1])) if verbose >= 1: print(f'Found previous experiment {exp[1]}') except ValueError: if verbose >=2: print(f'skipping invalid file {exp[1]}') pass experiment_files = sorted(keep_files, key= lambda exp: exp[0]) if type(experiment_files)==list and len(experiment_files)>0: experiment_file = experiment_files[-1] cfg.experiment.experiment_start_time = experiment_file[0].strftime(date_format) initialize_experiment(cfg, experiment_start_time=cfg.experiment.experiment_start_time) if verbose >= 1: print(f'Continuing experiment with start time =', cfg.experiment.experiment_start_time) return cfg else: print('No previous experiment in',cfg.experiment.experiment_dir, 'with prefix',prefix) cfg.experiment.experiment_start_time = datetime.now().strftime(date_format) initialize_experiment(cfg, experiment_start_time=cfg.experiment.experiment_start_time) if verbose >= 1: print('Initializing new experiment at time:', cfg.experiment.experiment_start_time ) return cfg
def test_db_dask_validate_images(db_path=None): ensure_dir_exists(os.path.dirname(VALIDATION_RESULTS_PATH)) if os.path.isfile(VALIDATION_RESULTS_PATH): os.remove(VALIDATION_RESULTS_PATH) if db_path is None: db_path = leavesdb.db_utils.init_local_db() db_URI = f'sqlite:///{db_path}' db = dataset.connect(db_URI) data = pd.DataFrame(db['dataset'].all()) data_by_dataset = data.groupby(by='dataset') data_by_dataset_dict = {k: v for k, v in data_by_dataset} #client = Client(threads_per_worker=10, n_workers=1) validated_datasets = pd.DataFrame( columns=['image_id', 'dataset', 'path', 'valid']) validated_datasets.to_csv(VALIDATION_RESULTS_PATH, mode='w', index=False) batch_size = 64 dataset_name = 'Leaves' dataset_rows = data_by_dataset_dict['Leaves'][:2000] if True: # for dataset_name, dataset_rows in data_by_dataset: # if dataset_name == 'Fossil': # continue print('validating dataset: ', dataset_name, ', shape=', dataset_rows.shape) dd = dask.delayed image_paths = list(dataset_rows['path']) num_paths = len(image_paths) image_ids = list(range(num_paths)) testing_data = [dd(row) for row in zip(image_paths, image_ids)] # testing_data = list(zip(image_paths,image_ids)) # validate = dask.delayed(dask_validate_image) # results = [validate(row[0],row[1],dataset_name) for row in testing_data] chunked_data = list(chunked(testing_data, batch_size)) testing_data = chunked_data #dask.delayed(chunked_data, nout=len(chunked_data)) # testing_data = dask.delayed(list(chunked(testing_data, batch_size))) validate = dask.delayed(dask_validate_batch, nout=1) results = [validate(row, dataset_name) for row in testing_data] with ProgressBar(): validated_results = dask.compute(*results) validated_data = pd.DataFrame(collapse(validated_results, levels=1)) validated_data.columns = ['image_id', 'dataset', 'path', 'valid'] validated_datasets = pd.concat([validated_datasets, validated_data], ignore_index=True) validated_data.to_csv(VALIDATION_RESULTS_PATH, mode='a', index=False) return validated_datasets
def save_label_encodings_to_file(encoding_dict, filepath): base_dir = os.path.dirname(filepath) ensure_dir_exists(base_dir) if filepath.endswith('json'): with open(filepath, 'w') as file: json.dump(encoding_dict, file) elif filepath.endswith('csv'): data = pd.DataFrame(list(encoding_dict.values()), index=list(encoding_dict.keys())) data.to_csv(filepath)
def __init__(self, config, csv_dir): self.config = config self.logger = loguru.logger # self.logger.add() self.csv_dir = csv_dir ensure_dir_exists(self.csv_dir) self.log_file = os.path.join(csv_dir, 'log_file.csv') self.handlers = [ self.logger.add(open(self.log_file, 'a'), level="INFO") ]
def build_or_restore_model(PARAMS, ): ensure_dir_exists(PARAMS['checkpoints_dir']) checkpoints = [ os.path.join(PARAMS['checkpoints_dir'], name) for name in os.listdir(PARAMS['checkpoints_dir']) ] if checkpoints: latest_checkpoint = max(checkpoints, key=os.path.getctime) print(f'found {len(checkpoints)} checkpoints') print("Restoring from", latest_checkpoint) return tf.keras.models.load_model(latest_checkpoint) print("No previously created saved model, creating a new model") return build_model(PARAMS)
def restore_or_initialize_experiment(PARAMS, restore_last=False, prefix='log_dir__', verbose=0): # date_format = '%Y-%m-%d_%H-%M-%S' PARAMS = copy.deepcopy(PARAMS) PARAMS['experiment_name'] = '_'.join( [PARAMS['dataset_name'], PARAMS['model_name']]) PARAMS['experiment_dir'] = os.path.join(PARAMS['neptune_experiment_dir'], PARAMS['experiment_name']) ensure_dir_exists(PARAMS['experiment_dir']) if restore_last: experiment_files = [ (exp_name.split(prefix)[-1], exp_name) for exp_name in os.listdir(PARAMS['experiment_dir']) ] keep_files = [] for i in range(len(experiment_files)): exp = experiment_files[i] try: keep_files.append((datetime.strptime(exp[0], date_format), exp[1])) if verbose >= 1: print(f'Found previous experiment {exp[1]}') except ValueError: if verbose >= 2: print(f'skipping invalid file {exp[1]}') pass experiment_files = sorted(keep_files, key=lambda exp: exp[0]) if type(experiment_files) == list and len(experiment_files) > 0: experiment_file = experiment_files[-1] PARAMS['experiment_start_time'] = experiment_file[0].strftime( date_format) initialize_experiment( PARAMS, experiment_start_time=PARAMS['experiment_start_time']) if verbose >= 1: print(f'Continuing experiment with start time =', PARAMS['experiment_start_time']) return PARAMS else: print('No previous experiment in', PARAMS['experiment_dir'], 'with prefix', prefix) PARAMS['experiment_start_time'] = datetime.now().strftime(date_format) initialize_experiment( PARAMS, experiment_start_time=PARAMS['experiment_start_time']) if verbose >= 1: print('Initializing new experiment at time:', PARAMS['experiment_start_time']) return PARAMS
def __init__(self, config): self.config = config self.logger = loguru.logger # self.logger.add() self.tracking_dir = config.logger.mlflow_tracking_dir self.log_file = config.logger.program_log_file ensure_dir_exists(os.path.dirname(self.log_file)) self.handlers = [self.logger.add(open(self.log_file,'w'), level="DEBUG")] ensure_dir_exists(self.tracking_dir) mlflow.set_tracking_uri(self.tracking_dir) print(mlflow.tracking.get_tracking_uri()) mlflow.set_experiment(config.experiment_type)
def __init__(self, dataset_name='Fossil', local_tfrecords=None, **kwargs): """Global config file for normalization experiments.""" self.dataset_name = 'Fossil' self.project_directory = '/media/data/jacob/fossil_experiments/' self.tfrecords = os.path.join( self.project_directory, 'tf_records') # Alternative, slow I/O path for tfrecords. self.local_tfrecords = local_tfrecords or self.tfrecords # Primary path for tfrecords. self.checkpoints = os.path.join(self.project_directory, 'checkpoints') self.summaries = os.path.join(self.project_directory, 'summaries') self.experiment_evaluations = os.path.join(self.project_directory, 'experiment_evaluations') self.plots = os.path.join(self.project_directory, 'plots') self.results = 'results' self.log_dir = os.path.join(self.project_directory, 'logs') # Create directories if they do not exist check_dirs = [ self.tfrecords, self.local_tfrecords, self.checkpoints, self.summaries, self.experiment_evaluations, self.plots, self.log_dir, ] [ensure_dir_exists(x) for x in check_dirs] self.seed = 1085
def init_dirs(self): self.model_dir = self.config.model_dir ensure_dir_exists(self.model_dir) if 'weights_filepath' in self.config: assert validate_filepath(self.config['weights_filepath'],file_type='h5') self.weights_filepath = self.config['weights_filepath'] else: self.weights_filepath = join(self.model_dir,f'{self.name}-model_weights.h5') if 'config_filepath' in self.config: assert validate_filepath(self.config['config_filepath'],file_type='json') self.config_filepath = self.config['config_filepath'] else: self.config_filepath = join(self.model_dir,f'{self.name}-model_config.json') self.config['weights_filepath'] = self.weights_filepath self.config['config_filepath'] = self.config_filepath
def check_if_tfrecords_exist(output_dir): '''if tfrecords already exist, return dictionary with mappings to their paths. Otherwise return None.''' tfrecords = None ensure_dir_exists(output_dir) subset_dirs = os.listdir(output_dir) if len(subset_dirs) > 0: tfrecords = {} for subset in subset_dirs: subset_path = os.path.join(output_dir, subset) subset_filenames = os.listdir(subset_path) if len(subset_filenames) == 0: return None tfrecords[subset] = sorted([ os.path.join(subset_path, filename) for filename in subset_filenames ]) return tfrecords
def initialize_experiment(cfg, experiment_start_time=None): # if 'stage_1' in cfg.pipeline: # for stage in cfg.pipeline: # cfg.experiment.experiment_name = '_'.join([config.dataset.dataset_name, config.model.model_name for config in ]) # else: cfg_0 = cfg.stage_0 cfg.experiment.experiment_name = '_'.join([cfg_0.dataset.dataset_name, cfg_0.model.model_name]) cfg.experiment.experiment_dir = os.path.join(cfg.experiment.neptune_experiment_dir, cfg.experiment.experiment_name) cfg.experiment.experiment_start_time = experiment_start_time or datetime.now().strftime(date_format) import pdb;pdb.set_trace() cfg.update(log_dir = os.path.join(cfg.experiment.experiment_dir, 'log_dir__'+cfg.experiment.experiment_start_time)) cfg.update(model_dir = os.path.join(cfg.log_dir,'model_dir')) cfg.stage_0.update(tfrecord_dir = os.path.join(cfg.log_dir,'tfrecord_dir')) cfg.update(tfrecord_dir = cfg.stage_0.tfrecord_dir) cfg.saved_model_path = str(Path(cfg.model_dir) / Path('saved_model')) cfg.checkpoints_path = str(Path(cfg.model_dir) / Path('checkpoints')) for k,v in cfg.items(): if '_dir' in k: ensure_dir_exists(v)
def process_and_save_dataset(data_df, name, encoder, validation_splits, experiment_dir, merge_new_labels=True, other_data_keys=[]): ''' Utility function for processing and saving data provided as a dataframe other_data_keys: list list of str indicating keys of additional columns to save alongside x, y (e.g. 'dataset') ''' data_dir = os.path.join(experiment_dir, name) ensure_dir_exists(data_dir) data_splits, metadata_splits = preprocess_data( data_df, encoder, validation_splits=validation_splits, output_dir=data_dir, threshold=10, merge_new_labels=merge_new_labels, other_data_keys=other_data_keys) metadata_splits.pop('label_map') for subset, d in data_splits.items(): if len(d['path']) == 0: continue x, y = list(d['path'].flatten()), list(d['label']) other_data = {k: d[k] for k in other_data_keys} save_paths_w_labels(x, y, encoder, data_dir, subset, other_data=other_data) save_metadata(metadata_splits, data_dir) save_label_maps(encoder, data_dir)
def initialize_experiment(PARAMS, experiment_start_time=None): if PARAMS['transfer_to_PNAS']: PARAMS['experiment_name'] = '_'.join( [PARAMS['dataset_name'] + '_to_PNAS', PARAMS['model_name']]) else: PARAMS['experiment_name'] = '_'.join( [PARAMS['dataset_name'], PARAMS['model_name']]) PARAMS['experiment_dir'] = os.path.join(PARAMS['neptune_experiment_dir'], PARAMS['experiment_name']) PARAMS['experiment_start_time'] = experiment_start_time or datetime.now( ).strftime(date_format) PARAMS['log_dir'] = os.path.join( PARAMS['experiment_dir'], 'log_dir__' + PARAMS['experiment_start_time']) PARAMS['model_dir'] = os.path.join(PARAMS['log_dir'], 'model_dir') PARAMS['tfrecord_dir'] = os.path.join(PARAMS['log_dir'], 'tfrecord_dir') PARAMS['saved_model_path'] = str( Path(PARAMS['model_dir']) / Path('saved_model')) PARAMS['checkpoints_path'] = str( Path(PARAMS['model_dir']) / Path('checkpoints')) for k, v in PARAMS.items(): if '_dir' in k: ensure_dir_exists(v)
def init_local_db(local_db=None, src_db=pyleaves.DATABASE_PATH, force_update=True, verbose=True): ''' Whenever working on a new machine, run this function in order to make sure the main leavesdb.db file is stored locally to avoid CIFS permissions issues. usage: init_local_db() force_update, bool: default True, if false, then only copy from src_db if local_db doesn't exist. ''' if not local_db: local_db = os.path.expanduser('~/scripts/leavesdb.db') ensure_dir_exists(os.path.dirname(local_db)) if (not os.path.isfile(local_db)) or force_update: if verbose: print(f'Copying sql db file from {src_db} to {local_db}') shutil.copyfile(src_db, local_db) if verbose: print(f'Proceeding with sql db at location {local_db}') return local_db
def list_files(self, records_dir): ''' Arguments: records_dir : path to flat directory containing TFRecord shards, usually one level below root_dir and used to indicate 1 specific data split (e.g. train, val, or test) Return: file_list : Sorted list of TFRecord files contained in flat directory root_dir ''' assert ensure_dir_exists(records_dir) file_list = sorted([ os.path.join(records_dir, filename) for filename in os.listdir(records_dir) if '.tfrecord' in filename ]) return file_list
def init_dirs(self): self.model_dir = self.config.model_dir ensure_dir_exists(self.model_dir) if 'weights_filepath' in self.config: assert validate_filepath(self.config['weights_filepath'],file_type='h5') self.weights_filepath = self.config['weights_filepath'] else: self.weights_filepath = join(self.model_dir,f'{self.name}-model_weights.h5') if 'config_filepath' in self.config: assert validate_filepath(self.config['config_filepath'],file_type='json') self.config_filepath = self.config['config_filepath'] else: self.config_filepath = join(self.model_dir,f'{self.name}-model_config.json') if 'checkpoint_filepath' in self.config: assert validate_filepath(self.config['checkpoint_filepath'],file_type='json') self.checkpoint_filepath = self.config['checkpoint_filepath'] else: self.checkpoint_filepath = join(self.model_dir,f'{self.name}-checkpoint.h5') self.config['weights_filepath'] = self.weights_filepath self.config['config_filepath'] = self.config_filepath self.base_model_filepath = os.path.join(self.model_dir, self.name+'-saved_base_model')
def convert_to_png(data_df, output_dir): ''' Function to load a list of image files, convert to png format if necessary, and save to specified target dir. Arguments: dataset_name, str: Name of source dataset from which images are sourced, to be name of subdir in target root dir target_dir, str: Root directory for converted images, which will be saved in hierarchy: root/ |dataset_1/ |class_1/ |image_1 |image_2 ... Return: ''' labels = set(list(data_df['label'])) [ensure_dir_exists(join(output_dir, label)) for label in labels] indices = list(range(len(data_df))) tiff_data = data_df[data_df['source_path'].str.endswith('.tif')] non_tiff_data = data_df[~(data_df['source_path'].str.endswith('.tif'))] outputs = [] try: if non_tiff_data.shape[0] > 0: print( f'converting {non_tiff_data.shape[0]} non-tiff images to png') outputs.extend(convert_from_nontiff2png(non_tiff_data)) if tiff_data.shape[0] > 0: print(f'converting {tiff_data.shape[0]} tiff images to png') outputs.extend(convert_from_tiff2png(tiff_data)) return outputs except Exception as e: print("Unexpected error:", sys.exc_info()) print(f'[ERROR] {e}') raise
def __init__(self, data, output_dir, columns={'source_path':'source_path','target_path':'path', 'label':'family'}): ''' Class for managing different conversion functions depending on source and target image formats. ''' self.output_ext = 'jpg' self.columns = columns self.labels = set(list(data[columns['label']])) [ensure_dir_exists(join(output_dir,label)) for label in self.labels] is_tiff = data[columns['source_path']].str.endswith('.tif') self.indices = { 'tiff':np.where(is_tiff)[0].tolist(), 'non_tiff':np.where(~is_tiff)[0].tolist() } self.data = { 'tiff':data.iloc[self.indices['tiff'],:], #data['source_path'].str.endswith('.tif')], 'non_tiff':data.iloc[self.indices['non_tiff']] #~(data['source_path'].str.endswith('.tif'))] }
def __init__(self, data, output_dir=None, columns={ 'source_path': 'source_path', 'target_path': 'path' }): ''' Dask version Class for managing different conversion functions depending on source and target image formats. ''' self.output_ext = 'jpg' labels = set(list(data['label'])) if output_dir: [ensure_dir_exists(join(output_dir, label)) for label in labels] self.indices = len(data['source_path']) self.data = data self.input_dataset = self.stage_converter(data, columns=columns)
args.gpu_id) ####SHOULD THIS BE AN INT??? tf.compat.v1.enable_eager_execution() import pyleaves from pyleaves.utils import ensure_dir_exists, process_hparam_args #### from pyleaves.data_pipeline.preprocessing import LabelEncoder from pyleaves.leavesdb.tf_utils.tf_utils import reset_eager_session from pyleaves.utils.csv_utils import gather_run_data, load_csv_data from pyleaves.train.callbacks import get_callbacks from pyleaves.config import DatasetConfig, TrainConfig, ExperimentConfig, CSVDomainDataConfig, CSVFrozenRunDataConfig from pyleaves.train.csv_trainer import CSVTrainer from pyleaves.analysis.mlflow_utils import mlflow_log_params_dict, mlflow_log_history, mlflow_log_best_history import mlflow import mlflow.tensorflow ensure_dir_exists(args.mlflow_tracking_dir) mlflow.set_tracking_uri(args.mlflow_tracking_dir) mlflow.set_experiment(args.experiment) # print(mlflow.tracking.get_tracking_uri()) ############################ ######################################### search_params = ['run_name', 'base_learning_rate', 'batch_size'] if args.model_name == 'all': args.model_name = ['resnet_50_v2', 'resnet_152_v2', 'vgg16'] elif type(args.model_name) == str: search_params.append('model_name') ######################################### #########################################
def train_pyleaves_dataset(PARAMS): ensure_dir_exists(PARAMS['log_dir']) ensure_dir_exists(PARAMS['model_dir']) neptune.append_tag(PARAMS['dataset_name']) neptune.append_tag(PARAMS['model_name']) neptune.append_tag(str(PARAMS['target_size'])) neptune.append_tag(PARAMS['num_channels']) neptune.append_tag(PARAMS['color_mode']) K.clear_session() tf.random.set_seed(PARAMS['seed']) train_dataset, validation_dataset, STAGE1_data_files, excluded = create_dataset( dataset_name=PARAMS['dataset_name'], threshold=PARAMS['threshold'], batch_size=PARAMS['BATCH_SIZE'], buffer_size=PARAMS['buffer_size'], exclude_classes=PARAMS['exclude_classes'], target_size=PARAMS['target_size'], num_channels=PARAMS['num_channels'], color_mode=PARAMS['color_mode'], splits=PARAMS['splits'], augmentations=PARAMS['augmentations'], seed=PARAMS['seed'], use_tfrecords=PARAMS['use_tfrecords'], tfrecord_dir=PARAMS['tfrecord_dir'], samples_per_shard=PARAMS['samples_per_shard']) PARAMS['num_classes'] = STAGE1_data_files.num_classes PARAMS['splits_size'] = {'train': {}, 'validation': {}} PARAMS['splits_size']['train'] = int(STAGE1_data_files.num_samples * PARAMS['splits']['train']) PARAMS['splits_size']['validation'] = int(STAGE1_data_files.num_samples * PARAMS['splits']['validation']) PARAMS['steps_per_epoch'] = PARAMS['splits_size']['train'] // PARAMS[ 'BATCH_SIZE'] PARAMS['validation_steps'] = PARAMS['splits_size']['validation'] // PARAMS[ 'BATCH_SIZE'] neptune.set_property('num_classes', PARAMS['num_classes']) neptune.set_property('steps_per_epoch', PARAMS['steps_per_epoch']) neptune.set_property('validation_steps', PARAMS['validation_steps']) # TODO: log encoder contents as dict encoder = base_dataset.LabelEncoder(STAGE1_data_files.classes) PARAMS['base_learning_rate'] = PARAMS['lr'] PARAMS['input_shape'] = (*PARAMS['target_size'], PARAMS['num_channels']) # strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0") # with strategy.scope(): model = build_model(PARAMS) # model = build_or_restore_model(PARAMS) model.summary(print_fn=lambda x: neptune.log_text('model_summary', x)) pprint(PARAMS) backup_callback = BackupAndRestore(PARAMS['checkpoints_path']) backup_callback.set_model(model) callbacks = [ neptune_logger, backup_callback, EarlyStopping(monitor='val_loss', patience=25, verbose=1, restore_best_weights=True) ] #, # ImageLoggerCallback(data=train_dataset, freq=1000, max_images=-1, name='train', encoder=encoder), # ImageLoggerCallback(data=validation_dataset, freq=1000, max_images=-1, name='val', encoder=encoder), history = model.fit(train_dataset, epochs=PARAMS['num_epochs'], callbacks=callbacks, validation_data=validation_dataset, shuffle=True, steps_per_epoch=PARAMS['steps_per_epoch'], validation_steps=PARAMS['validation_steps']) # initial_epoch=0, # TODO: Change build_model to build_or_load_model model.save(PARAMS['saved_model_path'] + '-stage 1') for k, v in PARAMS.items(): neptune.set_property(str(k), str(v)) if PARAMS['transfer_to_PNAS'] or PARAMS['transfer_to_Fossil']: PARAMS['include_classes'] = STAGE1_data_files.classes train_dataset, validation_dataset, STAGE2_data_files, STAGE2_excluded = create_dataset( dataset_name=PARAMS['stage_2'] ['dataset_name'], #PARAMS['dataset_name'], threshold=PARAMS['threshold'], batch_size=PARAMS['BATCH_SIZE'], buffer_size=PARAMS['buffer_size'], exclude_classes=PARAMS['exclude_classes'], include_classes=PARAMS['include_classes'], target_size=PARAMS['target_size'], num_channels=PARAMS['num_channels'], color_mode=PARAMS['color_mode'], splits=PARAMS['splits'], augmentations=PARAMS['augmentations'], seed=PARAMS['seed']) PARAMS['num_classes'] = STAGE2_data_files.num_classes PARAMS['splits_size'] = {'train': {}, 'validation': {}} PARAMS['splits_size']['train'] = int(STAGE2_data_files.num_samples * PARAMS['splits']['train']) PARAMS['splits_size']['validation'] = int( STAGE2_data_files.num_samples * PARAMS['splits']['validation']) PARAMS['steps_per_epoch'] = PARAMS['splits_size']['train'] // PARAMS[ 'BATCH_SIZE'] PARAMS['validation_steps'] = PARAMS['splits_size'][ 'validation'] // PARAMS['BATCH_SIZE'] backup_callback = BackupAndRestore(PARAMS['checkpoints_path']) backup_callback.set_model(model) callbacks = [ neptune_logger, backup_callback, EarlyStopping(monitor='val_loss', patience=25, verbose=1, restore_best_weights=True) ] #, history = model.fit(train_dataset, epochs=PARAMS['num_epochs'], callbacks=callbacks, validation_data=validation_dataset, shuffle=True, steps_per_epoch=PARAMS['steps_per_epoch'], validation_steps=PARAMS['validation_steps']) return history
def train_pnas(PARAMS): ensure_dir_exists(PARAMS['log_dir']) ensure_dir_exists(PARAMS['model_dir']) neptune.append_tag(PARAMS['dataset_name']) neptune.append_tag(PARAMS['model_name']) neptune.append_tag(str(PARAMS['target_size'])) neptune.append_tag(PARAMS['num_channels']) neptune.append_tag(PARAMS['color_mode']) K.clear_session() tf.random.set_seed(34) train_dataset, validation_dataset, data_files = create_dataset( dataset_name=PARAMS['dataset_name'], batch_size=PARAMS['BATCH_SIZE'], target_size=PARAMS['target_size'], num_channels=PARAMS['num_channels'], color_mode=PARAMS['color_mode'], splits=PARAMS['splits'], augment_train=PARAMS['augment_train'], aug_prob=PARAMS['aug_prob']) PARAMS['num_classes'] = data_files.num_classes PARAMS['splits_size'] = {'train': {}, 'validation': {}} PARAMS['splits_size'][ 'train'] = data_files.num_samples * PARAMS['splits']['train'] PARAMS['splits_size'][ 'validation'] = data_files.num_samples * PARAMS['splits']['validation'] steps_per_epoch = PARAMS['splits_size']['train'] // PARAMS['BATCH_SIZE'] validation_steps = PARAMS['splits_size']['validation'] // PARAMS[ 'BATCH_SIZE'] neptune.set_property('num_classes', PARAMS['num_classes']) neptune.set_property('steps_per_epoch', steps_per_epoch) neptune.set_property('validation_steps', validation_steps) encoder = base_dataset.LabelEncoder(data_files.classes) # train_dataset = train_dataset.map(lambda x,y: apply_preprocess(x,y,PARAMS['num_classes']),num_parallel_calls=-1) # validation_dataset = validation_dataset.map(lambda x,y: apply_preprocess(x,y,PARAMS['num_classes']),num_parallel_calls=-1) # METRICS = ['accuracy'] callbacks = [ neptune_logger, ImageLoggerCallback(data=train_dataset, freq=10, max_images=-1, name='train', encoder=encoder), ImageLoggerCallback(data=validation_dataset, freq=10, max_images=-1, name='val', encoder=encoder), EarlyStopping(monitor='val_loss', patience=25, verbose=1) ] PARAMS['base_learning_rate'] = PARAMS['lr'] PARAMS['input_shape'] = (*PARAMS['target_size'], PARAMS['num_channels']) model = build_model(PARAMS) # if PARAMS['optimizer']=='Adam': # optimizer = tf.keras.optimizers.Adam(learning_rate=PARAMS['lr']) # base = tf.keras.applications.vgg16.VGG16(weights='imagenet', # include_top=False, # input_tensor=Input(shape=(*PARAMS['target_size'],3))) # model = build_head(base, num_classes=PARAMS['num_classes']) # model.compile(optimizer=optimizer, # loss=PARAMS['loss'], # metrics=METRICS) model.summary(print_fn=lambda x: neptune.log_text('model_summary', x)) pprint(PARAMS) history = model.fit(train_dataset, epochs=PARAMS['num_epochs'], callbacks=callbacks, validation_data=validation_dataset, shuffle=True, initial_epoch=0, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) for k, v in PARAMS.items(): neptune.set_property(str(k), str(v)) return history
def train_pnas(PARAMS): ensure_dir_exists(PARAMS['log_dir']) ensure_dir_exists(PARAMS['model_dir']) neptune.append_tag(PARAMS['dataset_name']) neptune.append_tag(PARAMS['model_name']) neptune.append_tag(str(PARAMS['target_size'])) neptune.append_tag(PARAMS['num_channels']) neptune.append_tag(PARAMS['color_mode']) K.clear_session() tf.random.set_seed(34) train_dataset, validation_dataset, data_files = create_dataset(dataset_name=PARAMS['dataset_name'], threshold=PARAMS['threshold'], batch_size=PARAMS['BATCH_SIZE'], buffer_size=PARAMS['buffer_size'], target_size=PARAMS['target_size'], num_channels=PARAMS['num_channels'], color_mode=PARAMS['color_mode'], splits=PARAMS['splits'], augmentations=PARAMS['augmentations'], aug_prob=PARAMS['aug_prob']) PARAMS['num_classes'] = data_files.num_classes PARAMS['splits_size'] = {'train':{}, 'validation':{}} PARAMS['splits_size']['train'] = int(data_files.num_samples*PARAMS['splits']['train']) PARAMS['splits_size']['validation'] = int(data_files.num_samples*PARAMS['splits']['validation']) steps_per_epoch = PARAMS['splits_size']['train']//PARAMS['BATCH_SIZE'] validation_steps = PARAMS['splits_size']['validation']//PARAMS['BATCH_SIZE'] neptune.set_property('num_classes',PARAMS['num_classes']) neptune.set_property('steps_per_epoch',steps_per_epoch) neptune.set_property('validation_steps',validation_steps) encoder = base_dataset.LabelEncoder(data_files.classes) # METRICS = ['accuracy'] callbacks = [neptune_logger, ImageLoggerCallback(data=train_dataset, freq=1000, max_images=-1, name='train', encoder=encoder), ImageLoggerCallback(data=validation_dataset, freq=1000, max_images=-1, name='val', encoder=encoder), EarlyStopping(monitor='val_loss', patience=25, verbose=1)] PARAMS['base_learning_rate'] = PARAMS['lr'] PARAMS['input_shape'] = (*PARAMS['target_size'],PARAMS['num_channels']) model = build_model(PARAMS) # if PARAMS['optimizer']=='Adam': # optimizer = tf.keras.optimizers.Adam(learning_rate=PARAMS['lr']) model.summary(print_fn=lambda x: neptune.log_text('model_summary', x)) pprint(PARAMS) history = model.fit(train_dataset, epochs=PARAMS['num_epochs'], callbacks=callbacks, validation_data=validation_dataset, shuffle=True, initial_epoch=0, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) for k,v in PARAMS.items(): neptune.set_property(str(k),str(v)) return history
def subset(self, new_subset): self._subset = new_subset self.output_dir = self.root_dir #join(self.root_dir,self._subset) ensure_dir_exists(self.output_dir)
search_params = ['dataset_names'] if args.dataset_names = 'all': conv_all=True args = process_hparam_args(args,search_params) data_by_dataset_dict={} for k,v in data_by_dataset: if k in args.dataset_names or conv_all: data_by_dataset_dict.update({k:v}) data_records = [] new_data_location_info = {} for dataset_name, rows in data_by_dataset_dict.items(): output_dir=join(args.target_dir,dataset_name) ensure_dir_exists(output_dir) if 'source_path' in rows.columns: rows['path'] = rows['source_path'] get_converted_image_name = partial(get_converted_image_name, output_dir=output_dir, output_format=args.target_ext) rows.loc[:,'label'] = rows.loc[:,'family'] data_df = rows.apply(get_converted_image_name,axis=1) data_records.extend(data_df.to_dict('records')) num_files = len(rows) print(f'[BEGINNING] copying {num_files} from {dataset_name}') start_time = time.perf_counter() try: if args.target_ext == 'jpg': coder = DaskCoder(data_df, output_dir)
def init_directories(self, dirs): for dir_name, dir_path in dirs.items(): ensure_dir_exists(dir_path)
def save_config(self, filepath): base_dir = os.path.dirname(filepath) ensure_dir_exists(base_dir) with open(filepath, 'w') as file: json.dump(self, file)
from pyleaves.leavesdb.tf_utils.tf_utils import reset_eager_session from pyleaves.models.resnet import ResNet, ResNetGrayScale from pyleaves.models.vgg16 import VGG16, VGG16GrayScale from pyleaves.models.keras_models import build_model from pyleaves.train.callbacks import get_callbacks from pyleaves.config import DatasetConfig, TrainConfig, ExperimentConfig from pyleaves.train.base_trainer import BaseTrainer, BaseTrainer_v1 from pyleaves.analysis.mlflow_utils import mlflow_log_history, mlflow_log_best_history import mlflow import mlflow.tensorflow tracking_dir = r'/media/data/jacob/Fossil_Project/experiments/mlflow' ensure_dir_exists(tracking_dir) mlflow.set_tracking_uri(tracking_dir) print(mlflow.tracking.get_tracking_uri()) mlflow.set_experiment(args.experiment) # print(mlflow.get_artifact_uri()) # if args.num_channels==3: # color_type = 'rgb' # else: # color_type = 'grayscale' ############################ # Spaghetti Code for Assembling Hyperparameter search records to iterate through ######################################### ######################################### import itertools
def train_pyleaves_dataset(cfg : DictConfig) -> None: print(cfg.pretty()) import pdb; pdb.set_trace() cfg_0 = cfg.stage_0 ensure_dir_exists(cfg['log_dir']) ensure_dir_exists(cfg['model_dir']) neptune.append_tag(cfg_0.dataset.dataset_name) neptune.append_tag(cfg_0.model.model_name) neptune.append_tag(str(cfg_0.dataset.target_size)) neptune.append_tag(cfg_0.dataset.num_channels) neptune.append_tag(cfg_0.dataset.color_mode) K.clear_session() tf.random.set_seed(cfg_0.misc.seed) train_dataset, validation_dataset, STAGE1_data_files, excluded = create_dataset(dataset_name=cfg_0.dataset.dataset_name, threshold=cfg_0.dataset.threshold, batch_size=cfg_0.training.batch_size, buffer_size=cfg_0.training.buffer_size, exclude_classes=cfg_0.dataset.exclude_classes, target_size=cfg_0.dataset.target_size, num_channels=cfg_0.dataset.num_channels, color_mode=cfg_0.dataset.color_mode, splits=cfg_0.dataset.splits, augmentations=cfg_0.training.augmentations, seed=cfg_0.misc.seed, use_tfrecords=cfg_0.misc.use_tfrecords, tfrecord_dir=cfg_0.dataset.tfrecord_dir, samples_per_shard=cfg_0.misc.samples_per_shard) cfg_0.num_classes = STAGE1_data_files.num_classes cfg['splits_size'] = {'train':{}, 'validation':{}} cfg['splits_size']['train'] = int(STAGE1_data_files.num_samples*cfg['splits']['train']) cfg['splits_size']['validation'] = int(STAGE1_data_files.num_samples*cfg['splits']['validation']) cfg['steps_per_epoch'] = cfg['splits_size']['train']//cfg['BATCH_SIZE'] cfg['validation_steps'] = cfg['splits_size']['validation']//cfg['BATCH_SIZE'] neptune.set_property('num_classes',cfg['num_classes']) neptune.set_property('steps_per_epoch',cfg['steps_per_epoch']) neptune.set_property('validation_steps',cfg['validation_steps']) # TODO: log encoder contents as dict encoder = base_dataset.LabelEncoder(STAGE1_data_files.classes) cfg['base_learning_rate'] = cfg['lr'] cfg['input_shape'] = (*cfg['target_size'],cfg['num_channels']) # strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0") # with strategy.scope(): model = build_model(cfg) # model = build_or_restore_model(cfg) model.summary(print_fn=lambda x: neptune.log_text('model_summary', x)) pprint(cfg) backup_callback = BackupAndRestore(cfg['checkpoints_path']) backup_callback.set_model(model) callbacks = [neptune_logger, backup_callback, EarlyStopping(monitor='val_loss', patience=25, verbose=1, restore_best_weights=True)]#, # ImageLoggerCallback(data=train_dataset, freq=1000, max_images=-1, name='train', encoder=encoder), # ImageLoggerCallback(data=validation_dataset, freq=1000, max_images=-1, name='val', encoder=encoder), history = model.fit(train_dataset, epochs=cfg['num_epochs'], callbacks=callbacks, validation_data=validation_dataset, shuffle=True, steps_per_epoch=cfg['steps_per_epoch'], validation_steps=cfg['validation_steps']) # initial_epoch=0, # TODO: Change build_model to build_or_load_model model.save(cfg['saved_model_path'] + '-stage 1') for k,v in cfg.items(): neptune.set_property(str(k),str(v)) if cfg['transfer_to_PNAS'] or cfg['transfer_to_Fossil']: cfg['include_classes'] = STAGE1_data_files.classes train_dataset, validation_dataset, STAGE2_data_files, STAGE2_excluded = create_dataset(dataset_name=cfg['stage_2']['dataset_name'], #cfg['dataset_name'], threshold=cfg['threshold'], batch_size=cfg['BATCH_SIZE'], buffer_size=cfg['buffer_size'], exclude_classes=cfg['exclude_classes'], include_classes=cfg['include_classes'], target_size=cfg['target_size'], num_channels=cfg['num_channels'], color_mode=cfg['color_mode'], splits=cfg['splits'], augmentations=cfg['augmentations'], seed=cfg['seed']) cfg['num_classes'] = STAGE2_data_files.num_classes cfg['splits_size'] = {'train':{}, 'validation':{}} cfg['splits_size']['train'] = int(STAGE2_data_files.num_samples*cfg['splits']['train']) cfg['splits_size']['validation'] = int(STAGE2_data_files.num_samples*cfg['splits']['validation']) cfg['steps_per_epoch'] = cfg['splits_size']['train']//cfg['BATCH_SIZE'] cfg['validation_steps'] = cfg['splits_size']['validation']//cfg['BATCH_SIZE'] backup_callback = BackupAndRestore(cfg['checkpoints_path']) backup_callback.set_model(model) callbacks = [neptune_logger, backup_callback, EarlyStopping(monitor='val_loss', patience=25, verbose=1, restore_best_weights=True)]#, history = model.fit(train_dataset, epochs=cfg['num_epochs'], callbacks=callbacks, validation_data=validation_dataset, shuffle=True, steps_per_epoch=cfg['steps_per_epoch'], validation_steps=cfg['validation_steps']) return history