def train_fold(save_dir, train_folds, val_folds, folds_data): train_dataset = StackingDataset(folds_data, train_folds, get_transforms(True), DATASET_SIZE) val_dataset = StackingDataset(folds_data, val_folds, get_transforms(False)) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=NUM_WORKERS) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE * 2, shuffle=False, num_workers=NUM_WORKERS) model = StackingModel(PARAMS) callbacks = [ MonitorCheckpoint(save_dir, monitor='val_lwlrap', max_saves=1), ReduceLROnPlateau(monitor='val_lwlrap', patience=RS_PARAMS['patience'], factor=RS_PARAMS['factor'], min_lr=1e-8), EarlyStopping(monitor='val_lwlrap', patience=30), LoggingToFile(save_dir / 'log.txt'), ] model.fit(train_loader, val_loader=val_loader, max_epochs=700, callbacks=callbacks, metrics=['multi_accuracy', 'lwlrap'])
def train_fold(save_dir, train_folds, val_folds, model_path): depth_trns = SimpleDepthTransform() train_trns = SaltTransform(IMAGE_SIZE, True, 'crop') val_trns = SaltTransform(IMAGE_SIZE, False, 'crop') train_dataset = SaltDataset(TRAIN_FOLDS_PATH, train_folds, train_trns, depth_trns) val_dataset = SaltDataset(TRAIN_FOLDS_PATH, val_folds, val_trns, depth_trns) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=8) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) model = load_model(model_path) model.loss.lovasz_weight = 0.5 model.loss.prob_weight = 0.5 callbacks = [ MonitorCheckpoint(save_dir, monitor='val_crop_iout', max_saves=3, copy_last=False), LoggingToFile(os.path.join(save_dir, 'log.txt')), update_lr ] model.fit(train_loader, val_loader=val_loader, max_epochs=500, callbacks=callbacks, metrics=['crop_iout'])
def test_monitor_checkpoint(self, tmpdir, test_engine, optimizer_state): path = Path(tmpdir.join("path/to/monitor_checkpoints/")) checkpoint = MonitorCheckpoint(dir_path=path, max_saves=3, monitor='val_loss', optimizer_state=optimizer_state) checkpoint.attach(test_engine) checkpoint.start(test_engine.state) decreasing_seq = list(range(30))[::-1] for i in range(1, len(decreasing_seq), 2): decreasing_seq[i] = 100 for epoch, val_loss in enumerate(decreasing_seq, 1): checkpoint_step_epoch(checkpoint, test_engine, epoch, val_loss) expected_path = path / f'model-{epoch:03d}-{val_loss:.6f}.pth' if val_loss != 100: assert check_checkpoint(path, test_engine, epoch, val_loss, optimizer_state=optimizer_state) else: assert not expected_path.exists() assert len(list(path.glob('*.pth'))) == 3
def train_fold(base_model_path, save_dir, train_folds, val_folds, folds_data, noisy_data): train_transfrom = get_transforms(train=True, size=CROP_SIZE, wrap_pad_prob=WRAP_PAD_PROB) mixer = RandomMixer([ SigmoidConcatMixer(sigmoid_range=(3, 12)), AddMixer(alpha_dist='uniform') ], p=[0.6, 0.4]) mixer = UseMixerWithProb(mixer, prob=MIXER_PROB) curated_dataset = FreesoundDataset(folds_data, train_folds, transform=train_transfrom, mixer=mixer) noisy_dataset = FreesoundNoisyDataset(noisy_data, transform=train_transfrom, mixer=mixer) train_dataset = RandomDataset([noisy_dataset, curated_dataset], p=[NOISY_PROB, 1 - NOISY_PROB], size=DATASET_SIZE) val_dataset = FreesoundDataset(folds_data, val_folds, get_transforms(False, CROP_SIZE)) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=NUM_WORKERS) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE * 2, shuffle=False, num_workers=NUM_WORKERS) model = load_model(base_model_path, device=DEVICE) model.set_lr(BASE_LR) callbacks = [ MonitorCheckpoint(save_dir, monitor='val_lwlrap', max_saves=3), CosineAnnealing(T_0=10, T_mult=2, eta_min=0.00001), LoggingToFile(save_dir / 'log.txt'), ] model.fit(train_loader, val_loader=val_loader, max_epochs=150, callbacks=callbacks, metrics=['multi_accuracy', 'lwlrap'])
def test_checkpoint_exceptions(self, tmpdir, test_engine, recwarn): path = Path(tmpdir.join("path/to/exception_checkpoints/")) with pytest.raises(ValueError): Checkpoint(dir_path=path, max_saves=-3) path.mkdir(parents=True) Checkpoint(dir_path=path) assert len(recwarn) == 1 warn = recwarn.pop() assert f"Directory '{path}' already exists" == str(warn.message) with pytest.raises(ValueError): MonitorCheckpoint(dir_path=path, monitor='qwerty') checkpoint = MonitorCheckpoint(dir_path=path, monitor='train_loss') checkpoint.attach(test_engine) with pytest.raises(ValueError): checkpoint.epoch_complete(test_engine.state)
def train_fold(save_path, train_folds, val_folds): train_loader, val_loader = get_data_loaders(BATCH_SIZE, train_folds, val_folds) model = ShipMetaModel(params) callbacks = [ MonitorCheckpoint(save_path, monitor='val_iout', max_saves=2, copy_last=True), EarlyStopping(monitor='val_iout', patience=40), ReduceLROnPlateau(monitor='val_iout', patience=10, factor=0.2, min_lr=1e-8), LoggingToFile(os.path.join(save_path, 'log.txt')) ] model.fit(train_loader, val_loader=val_loader, max_epochs=EPOCHS, callbacks=callbacks, metrics=['iout'])
def test_pipeline(tmpdir, get_batch_function, linear_argus_model_instance): model = linear_argus_model_instance experiment_dir = Path(tmpdir.join("path/to/pipeline_experiment/")) train_dataset = TensorDataset(*get_batch_function(batch_size=4096)) val_dataset = TensorDataset(*get_batch_function(batch_size=512)) train_loader = DataLoader(train_dataset, shuffle=True, drop_last=True, batch_size=32) val_loader = DataLoader(val_dataset, shuffle=False, batch_size=64) monitor_checkpoint = MonitorCheckpoint(dir_path=experiment_dir, monitor='val_loss', max_saves=1) callbacks = [ monitor_checkpoint, EarlyStopping(monitor='val_loss', patience=9), ReduceLROnPlateau(monitor='val_loss', factor=0.64, patience=3), LoggingToFile(experiment_dir / 'log.txt'), LoggingToCSV(experiment_dir / 'log.csv') ] model.fit(train_loader, val_loader=val_loader, num_epochs=100, callbacks=callbacks) val_loss = model.validate(val_loader)['val_loss'] assert val_loss < 0.1 model_paths = sorted(experiment_dir.glob('*.pth')) assert len(model_paths) == 1 loaded_model = load_model(model_paths[0]) loaded_val_loss = loaded_model.validate(val_loader)['val_loss'] assert loaded_val_loss == monitor_checkpoint.best_value assert (experiment_dir / 'log.txt').exists() assert (experiment_dir / 'log.csv').exists()
def train_fold(save_dir, train_folds, val_folds): depth_trns = SimpleDepthTransform() train_trns = SaltTransform(IMAGE_SIZE, True, 'crop') val_trns = SaltTransform(IMAGE_SIZE, False, 'crop') train_dataset = SaltDataset(TRAIN_FOLDS_PATH, train_folds, train_trns, depth_trns) val_dataset = SaltDataset(TRAIN_FOLDS_PATH, val_folds, val_trns, depth_trns) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=4) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4) model = SaltMetaModel(PARAMS) callbacks = [ MonitorCheckpoint(save_dir, monitor='val_crop_iout', max_saves=3, copy_last=False), EarlyStopping(monitor='val_crop_iout', patience=100), ReduceLROnPlateau(monitor='val_crop_iout', patience=30, factor=0.64, min_lr=1e-8), LoggingToFile(os.path.join(save_dir, 'log.txt')), ] model.fit(train_loader, val_loader=val_loader, max_epochs=700, callbacks=callbacks, metrics=['crop_iout'])
depth_trns) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=4) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4) model = SaltMetaModel(params) callbacks = [ MonitorCheckpoint(experiment_dir, monitor='val_crop_iout', max_saves=1, copy_last=False), EarlyStopping(monitor='val_crop_iout', patience=100), ReduceLROnPlateau(monitor='val_crop_iout', patience=30, factor=0.7, min_lr=1e-8), LoggingToFile(os.path.join(experiment_dir, 'log.txt')) ] with open(os.path.join(experiment_dir, 'random_params.json'), 'w') as outfile: json.dump(random_params, outfile) model.fit(train_loader, val_loader=val_loader,
train=True) # define your dataset train_loader = DataLoader( train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=4, ) val_dataset = OcrDataset(CV_CONFIG.get("data_path"), transforms=transforms) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4) model = CRNNModel(MODEL_PARAMS) callbacks = [ MonitorCheckpoint(EXPERIMENT_DIR, monitor="val_cer", max_saves=6), ] # YOU CAN IMPLEMENT DIFFERENT METRICS AND USE THEM TO SEE HOW MANY CORRECT PREDICTION YOU HAVE metrics = [CER()] model.fit( train_loader, val_loader=val_loader, max_epochs=NUM_EPOCHS, metrics=metrics, callbacks=callbacks, metrics_on_train=True, )
batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=4, ) val_dataset = OcrDataset(DATASET_PATHS[0], transforms=val_transforms, train=False) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4) model = CRNNModel(MODEL_PARAMS) callbacks = [ MonitorCheckpoint(EXPERIMENT_DIR, monitor="val_char_error_rate", max_saves=6), ] metrics = [CER()] model.fit( train_loader, val_loader=val_loader, max_epochs=NUM_EPOCHS, metrics=metrics, callbacks=callbacks, metrics_on_train=True, )
params = { 'nn_module': { 'n_classes': 10, 'p_dropout': args.dropout }, 'optimizer': { 'lr': args.lr }, 'device': args.device } model = MnistModel(params) callbacks = [ MonitorCheckpoint(dir_path='mnist/', monitor='val_accuracy', max_saves=3), EarlyStopping(monitor='val_accuracy', patience=9), ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=3), LoggingToCSV('mnist/log.csv') ] model.fit(train_loader, val_loader=val_loader, num_epochs=args.epochs, metrics=['accuracy'], callbacks=callbacks, metrics_on_train=True) del model model_path = Path("mnist/").glob("*.pth")
) # IT IS BETTER TO SPLIT DATA INTO TRAIN|VAL AND USE METRICS ON VAL val_dataset_paths = [p / "val" for p in DATASET_PATHS] val_dataset = ConcatDataset( [OcrDataset(p, transforms=transforms) for p in val_dataset_paths]) # val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4) model = CRNNModel(MODEL_PARAMS) # YOU CAN ADD CALLBACK IF IT NEEDED, FIND MORE IN argus.callbacks callbacks = [ MonitorCheckpoint(EXPERIMENT_DIR, monitor="val_str_accuracy_letter", max_saves=6), EarlyStopping(monitor='val_loss', patience=200), ] # YOU CAN IMPLEMENT DIFFERENT METRICS AND USE THEM TO SEE HOW MANY CORRECT PREDICTION YOU HAVE metrics = [StringAccuracy(), StringAccuracyLetters()] model.fit( train_loader, val_loader=val_loader, max_epochs=NUM_EPOCHS, metrics=metrics, callbacks=callbacks, metrics_on_train=True, )
def train_folds(save_dir, folds_data): random_params = { 'base_size': int(np.random.choice([64, 128, 256, 512])), 'reduction_scale': int(np.random.choice([2, 4, 8, 16])), 'p_dropout': float(np.random.uniform(0.0, 0.5)), 'lr': float(np.random.uniform(0.0001, 0.00001)), 'patience': int(np.random.randint(3, 12)), 'factor': float(np.random.uniform(0.5, 0.8)), 'batch_size': int(np.random.choice([32, 64, 128])), } pprint(random_params) save_dir.mkdir(parents=True, exist_ok=True) with open(save_dir / 'random_params.json', 'w') as outfile: json.dump(random_params, outfile) params = { 'nn_module': ('FCNet', { 'in_channels': len(config.classes) * len(EXPERIMENTS), 'num_classes': len(config.classes), 'base_size': random_params['base_size'], 'reduction_scale': random_params['reduction_scale'], 'p_dropout': random_params['p_dropout'] }), 'loss': 'BCEWithLogitsLoss', 'optimizer': ('Adam', { 'lr': random_params['lr'] }), 'device': 'cuda', } for fold in config.folds: val_folds = [fold] train_folds = list(set(config.folds) - set(val_folds)) save_fold_dir = save_dir / f'fold_{fold}' print(f"Val folds: {val_folds}, Train folds: {train_folds}") print(f"Fold save dir {save_fold_dir}") train_dataset = StackingDataset(folds_data, train_folds, get_transforms(True), DATASET_SIZE) val_dataset = StackingDataset(folds_data, val_folds, get_transforms(False)) train_loader = DataLoader(train_dataset, batch_size=random_params['batch_size'], shuffle=True, drop_last=True, num_workers=NUM_WORKERS) val_loader = DataLoader(val_dataset, batch_size=random_params['batch_size'] * 2, shuffle=False, num_workers=NUM_WORKERS) model = StackingModel(params) callbacks = [ MonitorCheckpoint(save_fold_dir, monitor='val_lwlrap', max_saves=1), ReduceLROnPlateau(monitor='val_lwlrap', patience=random_params['patience'], factor=random_params['factor'], min_lr=1e-8), EarlyStopping(monitor='val_lwlrap', patience=20), LoggingToFile(save_fold_dir / 'log.txt'), ] model.fit(train_loader, val_loader=val_loader, max_epochs=300, callbacks=callbacks, metrics=['multi_accuracy', 'lwlrap'])
image_transform=val_trns) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=8, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=8, shuffle=False) model = IterSizeMetaModel(PARAMS) callbacks = [ MonitorCheckpoint(f'/workdir/data/experiments/{EXPERIMENT_NAME}', monitor='val_map_at_k', max_saves=10), EarlyStopping(monitor='val_map_at_k', patience=50), ReduceLROnPlateau(monitor='val_map_at_k', factor=0.64, patience=1, min_lr=0.000001), LoggingToFile(f'/workdir/data/experiments/{EXPERIMENT_NAME}/log.txt') ] model.fit(train_loader, val_loader=val_loader, max_epochs=1000, callbacks=callbacks, metrics=['accuracy', MAPatK(k=3)])
params = { 'nn_module': { 'model_name': 'tf_efficientnet_b0_ns', 'pretrained': True, 'num_classes': 10, 'drop_rate': 0.2, 'drop_path_rate': 0.2, }, 'optimizer': ('AdamW', { 'lr': args.lr }), 'loss': 'CrossEntropyLoss', 'device': args.device } model = CifarModel(params) callbacks = [ MonitorCheckpoint(dir_path=EXPERIMENT_DIR, monitor='val_accuracy', max_saves=3), EarlyStopping(monitor='val_accuracy', patience=9), ReduceLROnPlateau(monitor='val_accuracy', factor=0.64, patience=3), LoggingToCSV(EXPERIMENT_DIR / 'log.csv') ] model.fit(train_loader, val_loader=val_loader, num_epochs=args.epochs, metrics=['accuracy'], callbacks=callbacks)
def train_fold(save_dir, train_folds, val_folds, folds_data): train_transfrom = get_transforms(train=True, size=CROP_SIZE, wrap_pad_prob=0.0, resize_scale=(0.8, 1.0), resize_ratio=(1.7, 2.3), resize_prob=0.0, spec_num_mask=2, spec_freq_masking=0.15, spec_time_masking=0.20, spec_prob=0.0) val_transform = get_transforms(train=False, size=CROP_SIZE) if MIXER_PROB: mixer = get_mixer(mixer_prob=MIXER_PROB, sigmoid_range=(3, 12), alpha_dist='uniform', random_prob=(0.6, 0.4)) else: mixer = None train_dataset = BirdsongDataset(folds_data, folds=train_folds, transform=train_transfrom, mixer=mixer) val_dataset = BirdsongDataset(folds_data, folds=val_folds, transform=val_transform) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=NUM_WORKERS) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE * 2 // ITER_SIZE, shuffle=False, num_workers=NUM_WORKERS) model = BirdsongModel(PARAMS) if 'pretrained' in model.params['nn_module'][1]: model.params['nn_module'][1]['pretrained'] = False if USE_AMP: initialize_amp(model) model.set_device(DEVICES) num_iterations = (5 * len(train_dataset)) // BATCH_SIZE callbacks = [ MonitorCheckpoint(save_dir, monitor='val_loss', max_saves=1), CosineAnnealingLR(T_max=num_iterations, eta_min=0, step_on_iteration=True), EarlyStopping(monitor='val_loss', patience=12), LoggingToFile(save_dir / 'log.txt'), LoggingToCSV(save_dir / 'log.csv') ] model.fit(train_loader, val_loader=val_loader, num_epochs=EPOCHS, callbacks=callbacks, metrics=['f1_score']) del model model_path = get_best_model_path(save_dir) model = load_model(model_path) val_dataset = BirdsongDataset(folds_data, folds=val_folds + [config.n_folds], transform=val_transform) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE * 2 // ITER_SIZE, shuffle=False, num_workers=NUM_WORKERS) model.set_device(DEVICES[0]) model.validate(val_loader, metrics=['f1_score'], callbacks=[ LoggingToFile(save_dir / 'log.txt'), LoggingToCSV(save_dir / 'log.csv') ])
params = { 'nn_module': { 'n_classes': 10, 'p_dropout': args.dropout }, 'optimizer': { 'lr': args.lr }, 'device': args.device } model = MnistModel(params) callbacks = [ MonitorCheckpoint(dir_path='mnist/', monitor='val_accuracy', max_saves=3, copy_last=True), EarlyStopping(monitor='val_accuracy', patience=9), ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=3), LoggingToCSV('mnist/log.csv') ] model.fit(train_loader, val_loader=val_loader, max_epochs=args.epochs, metrics=['accuracy'], callbacks=callbacks, metrics_on_train=True) del model model = load_model('mnist/model-last.pth')
'loss': 'CrossEntropyLoss', 'device': 'cuda' } print("Model params:", params) model = ArcfaceModel(params) train_metric_dataset = WhaleDataset(train_val_csv_path, True, **val_transforms) monitor_metric = CosMAPatK(train_metric_dataset, k=5, batch_size=batch_size, num_workers=num_workers) monitor_metric_name = 'val_' + monitor_metric.name callbacks = [ MonitorCheckpoint(experiment_dir, monitor=monitor_metric_name, max_saves=3), EarlyStopping(monitor=monitor_metric_name, patience=50), ReduceLROnPlateau(monitor=monitor_metric_name, patience=10, factor=0.64, min_lr=1e-8), LoggingToFile(join(experiment_dir, 'log.txt')) ] with open(join(experiment_dir, 'source.py'), 'w') as outfile: outfile.write(open(__file__).read()) model.fit(train_loader, val_loader=val_loader, max_epochs=1000,
num_workers=4, ) val_dataset = OcrDataset(val_files, transforms) val_loader = DataLoader( val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, ) model = CRNNModel(MODEL_PARAMS) callbacks = [ MonitorCheckpoint(EXPERIMENT_DIR, monitor="train_loss", max_saves=6), #Checkpoint(EXPERIMENT_DIR), ] metrics = [ StringAccuracy(), ] model.fit( train_loader, val_loader=val_loader, max_epochs=NUM_EPOCHS, metrics=metrics, callbacks=callbacks, metrics_on_train=True, )
def train_fold(save_dir, train_folds, val_folds, folds_data, noisy_data, corrected_noisy_data): train_transfrom = get_transforms(train=True, size=CROP_SIZE, wrap_pad_prob=WRAP_PAD_PROB, resize_scale=(0.8, 1.0), resize_ratio=(1.7, 2.3), resize_prob=0.33, spec_num_mask=2, spec_freq_masking=0.15, spec_time_masking=0.20, spec_prob=0.5) mixer = RandomMixer([ SigmoidConcatMixer(sigmoid_range=(3, 12)), AddMixer(alpha_dist='uniform') ], p=[0.6, 0.4]) mixer = UseMixerWithProb(mixer, prob=MIXER_PROB) curated_dataset = FreesoundDataset(folds_data, train_folds, transform=train_transfrom, mixer=mixer) noisy_dataset = FreesoundNoisyDataset(noisy_data, transform=train_transfrom, mixer=mixer) corr_noisy_dataset = FreesoundCorrectedNoisyDataset( corrected_noisy_data, transform=train_transfrom, mixer=mixer) dataset_probs = [ NOISY_PROB, CORR_NOISY_PROB, 1 - NOISY_PROB - CORR_NOISY_PROB ] print("Dataset probs", dataset_probs) print("Dataset lens", len(noisy_dataset), len(corr_noisy_dataset), len(curated_dataset)) train_dataset = RandomDataset( [noisy_dataset, corr_noisy_dataset, curated_dataset], p=dataset_probs, size=DATASET_SIZE) val_dataset = FreesoundDataset(folds_data, val_folds, get_transforms(False, CROP_SIZE)) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=NUM_WORKERS) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE * 2, shuffle=False, num_workers=NUM_WORKERS) model = FreesoundModel(PARAMS) callbacks = [ MonitorCheckpoint(save_dir, monitor='val_lwlrap', max_saves=1), ReduceLROnPlateau(monitor='val_lwlrap', patience=6, factor=0.6, min_lr=1e-8), EarlyStopping(monitor='val_lwlrap', patience=18), LoggingToFile(save_dir / 'log.txt'), ] model.fit(train_loader, val_loader=val_loader, max_epochs=700, callbacks=callbacks, metrics=['multi_accuracy', 'lwlrap'])
def train_experiment(folds_data, noisy_data, num): experiment_dir = SAVE_DIR / f'{num:04}' np.random.seed(num) random.seed(num) random_params = { 'p_dropout': float(np.random.uniform(0.1, 0.3)), 'batch_size': int(np.random.choice([128])), 'lr': float(np.random.choice([0.001, 0.0006, 0.0003])), 'add_prob': float(np.random.uniform(0.0, 1.0)), 'noisy_prob': float(np.random.uniform(0.0, 1.0)), 'lsoft_beta': float(np.random.uniform(0.2, 0.8)), 'noisy_weight': float(np.random.uniform(0.3, 0.7)), 'patience': int(np.random.randint(2, 10)), 'factor': float(np.random.uniform(0.5, 0.8)) } pprint(random_params) params = { 'nn_module': ('SimpleKaggle', { 'num_classes': len(config.classes), 'dropout': random_params['p_dropout'], 'base_size': 64 }), 'loss': ('OnlyNoisyLSoftLoss', { 'beta': random_params['lsoft_beta'], 'noisy_weight': random_params['noisy_weight'], 'curated_weight': 1 - random_params['noisy_weight'] }), 'optimizer': ('Adam', {'lr': random_params['lr']}), 'device': 'cuda', 'amp': { 'opt_level': 'O2', 'keep_batchnorm_fp32': True, 'loss_scale': "dynamic" } } pprint(params) try: train_transfrom = get_transforms(True, CROP_SIZE) curated_dataset = FreesoundDataset(folds_data, TRAIN_FOLDS, transform=train_transfrom, add_prob=random_params['add_prob']) noisy_dataset = FreesoundNoisyDataset(noisy_data, transform=train_transfrom) train_dataset = CombinedDataset(noisy_dataset, curated_dataset, noisy_prob=random_params['noisy_prob'], size=DATASET_SIZE) val_dataset = FreesoundDataset(folds_data, VAL_FOLDS, get_transforms(False, CROP_SIZE)) train_loader = DataLoader(train_dataset, batch_size=random_params['batch_size'], shuffle=True, drop_last=True, num_workers=NUM_WORKERS) val_loader = DataLoader(val_dataset, batch_size=random_params['batch_size'] * 2, shuffle=False, num_workers=NUM_WORKERS) model = FreesoundModel(params) callbacks = [ MonitorCheckpoint(experiment_dir, monitor='val_lwlrap', max_saves=1), ReduceLROnPlateau(monitor='val_lwlrap', patience=random_params['patience'], factor=random_params['factor'], min_lr=1e-8), EarlyStopping(monitor='val_lwlrap', patience=20), LoggingToFile(experiment_dir / 'log.txt'), ] with open(experiment_dir / 'random_params.json', 'w') as outfile: json.dump(random_params, outfile) model.fit(train_loader, val_loader=val_loader, max_epochs=100, callbacks=callbacks, metrics=['multi_accuracy', 'lwlrap']) except KeyboardInterrupt as e: raise e except BaseException as e: print(f"Exception '{e}' with random params '{random_params}'")