def _init_session(C, weights_path=None, loadpath=None, model=None, model_fn=None): if model is None: model = model_fn(weights_path=weights_path, **C['model']) dg = DataGenerator(**C['datagen']) vdg = DataGenerator(**C['val_datagen']) tg = TrainGenerator(model, dg, vdg, loadpath=loadpath, **C['traingen']) return tg
def _test_misc(): C = deepcopy(DATAGEN_CFG) dg = DataGenerator(**C) dg.superbatch = {'1': 1, '2': 2} dg.superbatch_set_nums = ['3'] pass_on_error(dg._get_next_batch, set_num='3', warn=True) dg.all_labels = {} pass_on_error(dg._get_next_labels, set_num='3') pass_on_error(setattr, dg, 'load_data', 1) pass_on_error(setattr, dg, 'load_labels', 1) with tempdir() as dirpath: path = os.path.join(dirpath, "arr.npy") np.save(path, np.array([1])) C = deepcopy(DATAGEN_CFG) C['labels_path'] = None C['data_path'] = path pass_on_error(DataGenerator, **C)
def _test_lz4f_dataset(C): del C['labels_path'] C['data_path'] = os.path.join(datadir, 'image_lz4f', 'train', '128batch__1.npy') pass_on_error(DataGenerator, **C) C['data_loader'] = 'numpy-lz4f' pass_on_error(DataGenerator, **C) C['data_batch_shape'] = (128, 28, 28, 1) DataGenerator(**C)
def test_advance_batch(): C = deepcopy(DATAGEN_CFG) C['superbatch_path'] = os.path.join(datadir, 'image', 'train') dg = DataGenerator(**C) dg.advance_batch() C['batch_size'] = 31 dg = DataGenerator(**C) pass_on_error(dg.advance_batch) C['batch_size'] = 256 dg = DataGenerator(**C) dg.set_nums_to_process = [] pass_on_error(dg.advance_batch) C['data_loader'] = 'pigeon' pass_on_error(DataGenerator, **C)
def _test_infer_and_set_info(): C = deepcopy(DATAGEN_CFG) with tempdir() as dirpath: path = os.path.join(dirpath, "arr.npy") np.save(path, np.array([1])) C['labels_path'] = None C['data_loader'] = DataLoader(path, loader='numpy') DataGenerator(**C) C['labels_loader'] = DataLoader(path, loader='numpy') DataGenerator(**C) C['data_loader'] = DataGenerator pass_on_error(DataGenerator, **C) C['labels_loader'] = None C['data_loader'] = DataLoader DataGenerator(**C) C['labels_loader'] = DataGenerator pass_on_error(DataGenerator, **C)
def test_shuffle(): C = deepcopy(DATAGEN_CFG) C['shuffle_group_batches'] = True C['superbatch_path'] = os.path.join(datadir, 'image', 'train') C['batch_size'] = 64 dg = DataGenerator(**C) dg.preload_superbatch() dg.advance_batch()
def init_session(CONFIGS, model_fn): model = model_fn(**CONFIGS['model']) dg = DataGenerator(**CONFIGS['datagen']) vdg = DataGenerator(**CONFIGS['val_datagen']) tg = TrainGenerator(model, dg, vdg, **CONFIGS['traingen']) return tg
def _test_hdf5(C): C['data_loader'] = 'hdf5' dg = DataGenerator(**C) dg.advance_batch()
def _test_auto_hdf5(C): dg = DataGenerator(**C) dg.advance_batch()
def test_kwargs(): C = deepcopy(DATAGEN_CFG) C['shuffle_group_batches'] = True C['shuffle_group_samples'] = True DataGenerator(**C)
def _test_make_group_batch_and_labels(): C = deepcopy(DATAGEN_CFG) dg = DataGenerator(**C) dg.batch = np.random.randn(128, 10) dg.labels = np.random.randn(129, 10) pass_on_error(dg._make_group_batch_and_labels, n_batches=2) dg.shuffle_group_samples = True dg.labels = dg.batch.copy() dg._make_group_batch_and_labels(n_batches=2) dg.labels_path = None dg._make_group_batch_and_labels(n_batches=2) dg.shuffle_group_batches = True dg.shuffle_group_samples = False dg._make_group_batch_and_labels(n_batches=2)
def _test_uninstantiated(C): C['preprocessor'] = TimeseriesPreprocessor C['preprocessor_configs'] = dict(window_size=5) DataGenerator(**C)
def _test_no_loader(): C = deepcopy(DATAGEN_CFG) C['labels_loader'] = None C['labels_path'] = None DataGenerator(**C)
VAL_DATAGEN_CFG = dict( data_path=os.path.join(datadir, 'val'), labels_path=os.path.join(datadir, 'val', 'labels.h5'), batch_size=batch_size, shuffle=False, superbatch_set_nums='all', ) # Configs for TrainGenerator # epochs: number of epochs to train for # logs_dir: where to save TrainGenerator state, model, report, and history # best_models_dir: where to save model when it achieves new best # validation performance # model_configs: model configurations dict to save & write to report TRAINGEN_CFG = dict( epochs=3, logs_dir=os.path.join('dir', 'logs'), best_models_dir=os.path.join('dir', 'models'), model_configs=MODEL_CFG, ) #%%# Create training objects ################################################ model = make_model(**MODEL_CFG) datagen = DataGenerator(**DATAGEN_CFG) val_datagen = DataGenerator(**VAL_DATAGEN_CFG) traingen = TrainGenerator(model, datagen, val_datagen, **TRAINGEN_CFG) traingen.epochs = 1 traingen.unique_checkpoint_freq = {'epoch': 2} traingen.temp_checkpoint_freq = {'epoch': 2} #%%# Train ################################################################## traingen.train()
eval_fn='predict', val_freq={'epoch': 2}, plot_history_freq={'epoch': 2}, unique_checkpoint_freq={'epoch': 2}, model_save_kw=dict(include_optimizer=False, save_format='h5'), model_name_configs=dict(input_dropout='idp', preout_dropout='pdp', optimizer='', lr='', best_key_metric=None)) #%%# Create visualization callback ########################################## TRAINGEN_CFG['callbacks'] = [VizAE2D(n_images=8, save_images=True)] #%%# Create training objects ################################################ model = make_model(**MODEL_CFG) dg = DataGenerator(**DATAGEN_CFG) vdg = DataGenerator(**VAL_DATAGEN_CFG) tg = TrainGenerator(model, dg, vdg, **TRAINGEN_CFG) # save optimizer weights & attrs to load later tg.saveskip_list.pop(tg.saveskip_list.index('optimizer_state')) #%%# Train ################################################################## tg.train() #%%# Phase 2 ########## # switch to Mean Absolute Error loss; greater penalty to smaller errors # forces better image resolution. # Internally, TrainGenerator will append 'mae' loss to same list as was 'mse'. tg.model.compile('nadam', 'mae') tg.epochs = 12 tg.train()