def train_fold(base_model_path, save_dir, train_folds, val_folds, folds_data, noisy_data): train_transfrom = get_transforms(train=True, size=CROP_SIZE, wrap_pad_prob=WRAP_PAD_PROB) mixer = RandomMixer([ SigmoidConcatMixer(sigmoid_range=(3, 12)), AddMixer(alpha_dist='uniform') ], p=[0.6, 0.4]) mixer = UseMixerWithProb(mixer, prob=MIXER_PROB) curated_dataset = FreesoundDataset(folds_data, train_folds, transform=train_transfrom, mixer=mixer) noisy_dataset = FreesoundNoisyDataset(noisy_data, transform=train_transfrom, mixer=mixer) train_dataset = RandomDataset([noisy_dataset, curated_dataset], p=[NOISY_PROB, 1 - NOISY_PROB], size=DATASET_SIZE) val_dataset = FreesoundDataset(folds_data, val_folds, get_transforms(False, CROP_SIZE)) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=NUM_WORKERS) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE * 2, shuffle=False, num_workers=NUM_WORKERS) model = load_model(base_model_path, device=DEVICE) model.set_lr(BASE_LR) callbacks = [ MonitorCheckpoint(save_dir, monitor='val_lwlrap', max_saves=3), CosineAnnealing(T_0=10, T_mult=2, eta_min=0.00001), LoggingToFile(save_dir / 'log.txt'), ] model.fit(train_loader, val_loader=val_loader, max_epochs=150, callbacks=callbacks, metrics=['multi_accuracy', 'lwlrap'])
def get_sed_inference_loader(df: pd.DataFrame, datadir: Path, config: dict): transforms = get_transforms(config, "train") if config["data"].get("denoised_audio_dir") is not None: denoised_audio_dir = Path(config["data"]["denoised_audio_dir"]) else: denoised_audio_dir = None # type: ignore if config.get("dataset") is None: dataset = datasets.PANNsSedDataset( df, datadir, transforms, denoised_audio_dir) elif config["dataset"]["name"] == "ChannelsSedDataset": melspectrogram_parameters = config["dataset"]["melspectrogram_parameters"] pcen_parameters = config["dataset"]["pcen_parameters"] period = config["dataset"]["period"] dataset = datasets.ChannelsSedDataset( df, datadir, transforms, denoised_audio_dir, melspectrogram_parameters, pcen_parameters, period) elif config["dataset"]["name"] == "NormalizedChannelsSedDataset": melspectrogram_parameters = config["dataset"]["melspectrogram_parameters"] pcen_parameters = config["dataset"]["pcen_parameters"] period = config["dataset"]["period"] dataset = datasets.NormalizedChannelsSedDataset( df, datadir, transforms, denoised_audio_dir, melspectrogram_parameters, pcen_parameters, period) loader = data.DataLoader( dataset, batch_size=1, shuffle=False, num_workers=8) return loader
def experiment_pred(experiment_dir, audio_id2spec): print(f"Start predict: {experiment_dir}") transforms = get_transforms(False, CROP_SIZE) pred_lst = [] for fold in FOLDS: print("Predict fold", fold) fold_dir = experiment_dir / f'fold_{fold}' model_path = get_best_model_path(fold_dir) print("Model path", model_path) predictor = Predictor(model_path, transforms, BATCH_SIZE, CROP_SIZE, CROP_SIZE, DEVICE) transforms = get_transforms(False, CROP_SIZE) pred = fold_pred(predictor, audio_id2spec) pred_lst.append(pred) audio_id2pred = dict() for audio_id in audio_id2spec: pred = [p[audio_id] for p in pred_lst] audio_id2pred[audio_id] = np.mean(pred, axis=0) return audio_id2pred
def experiment_pred(experiment_dir, test_data): print(f"Start predict: {experiment_dir}") transforms = get_transforms(False, CROP_SIZE) pred_df_lst = [] for fold in config.folds: print("Predict fold", fold) fold_dir = experiment_dir / f'fold_{fold}' model_path = get_best_model_path(fold_dir) print("Model path", model_path) predictor = Predictor(model_path, transforms, BATCH_SIZE, (config.audio.n_mels, CROP_SIZE), (config.audio.n_mels, CROP_SIZE // 4), device=DEVICE) pred_df = pred_test(predictor, test_data) pred_df_lst.append(pred_df) pred_df = gmean_preds_blend(pred_df_lst) return pred_df
def train_fold(save_dir, train_folds, val_folds, folds_data, noisy_data, corrected_noisy_data): train_transfrom = get_transforms(train=True, size=CROP_SIZE, wrap_pad_prob=WRAP_PAD_PROB, resize_scale=(0.8, 1.0), resize_ratio=(1.7, 2.3), resize_prob=0.33, spec_num_mask=2, spec_freq_masking=0.15, spec_time_masking=0.20, spec_prob=0.5) mixer = RandomMixer([ SigmoidConcatMixer(sigmoid_range=(3, 12)), AddMixer(alpha_dist='uniform') ], p=[0.6, 0.4]) mixer = UseMixerWithProb(mixer, prob=MIXER_PROB) curated_dataset = FreesoundDataset(folds_data, train_folds, transform=train_transfrom, mixer=mixer) noisy_dataset = FreesoundNoisyDataset(noisy_data, transform=train_transfrom, mixer=mixer) corr_noisy_dataset = FreesoundCorrectedNoisyDataset( corrected_noisy_data, transform=train_transfrom, mixer=mixer) dataset_probs = [ NOISY_PROB, CORR_NOISY_PROB, 1 - NOISY_PROB - CORR_NOISY_PROB ] print("Dataset probs", dataset_probs) print("Dataset lens", len(noisy_dataset), len(corr_noisy_dataset), len(curated_dataset)) train_dataset = RandomDataset( [noisy_dataset, corr_noisy_dataset, curated_dataset], p=dataset_probs, size=DATASET_SIZE) val_dataset = FreesoundDataset(folds_data, val_folds, get_transforms(False, CROP_SIZE)) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=NUM_WORKERS) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE * 2, shuffle=False, num_workers=NUM_WORKERS) model = FreesoundModel(PARAMS) callbacks = [ MonitorCheckpoint(save_dir, monitor='val_lwlrap', max_saves=1), ReduceLROnPlateau(monitor='val_lwlrap', patience=6, factor=0.6, min_lr=1e-8), EarlyStopping(monitor='val_lwlrap', patience=18), LoggingToFile(save_dir / 'log.txt'), ] model.fit(train_loader, val_loader=val_loader, max_epochs=700, callbacks=callbacks, metrics=['multi_accuracy', 'lwlrap'])
from src.datasets import WhaleDataset, RandomWhaleDataset from src.argus_models import ArcfaceModel from src.metrics import CosMAPatK from src import config experiment_name = 'arcface_resnet50_016' experiment_dir = join(config.EXPERIMENTS_DIR, experiment_name) train_val_csv_path = config.TRAIN_VAL_CSV_PATH image_size = (96, 304) num_workers = 8 batch_size = 128 balance_coef = 0.0 train_epoch_size = 50000 if __name__ == "__main__": train_transforms = get_transforms(True, image_size) train_dataset = RandomWhaleDataset(train_val_csv_path, True, balance_coef=balance_coef, size=train_epoch_size, **train_transforms) val_transforms = get_transforms(False, image_size) val_dataset = WhaleDataset(train_val_csv_path, False, **val_transforms) train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=num_workers,
def train_fold(save_dir, train_folds, val_folds, folds_data): train_transfrom = get_transforms(train=True, size=CROP_SIZE, wrap_pad_prob=0.0, resize_scale=(0.8, 1.0), resize_ratio=(1.7, 2.3), resize_prob=0.0, spec_num_mask=2, spec_freq_masking=0.15, spec_time_masking=0.20, spec_prob=0.0) val_transform = get_transforms(train=False, size=CROP_SIZE) if MIXER_PROB: mixer = get_mixer(mixer_prob=MIXER_PROB, sigmoid_range=(3, 12), alpha_dist='uniform', random_prob=(0.6, 0.4)) else: mixer = None train_dataset = BirdsongDataset(folds_data, folds=train_folds, transform=train_transfrom, mixer=mixer) val_dataset = BirdsongDataset(folds_data, folds=val_folds, transform=val_transform) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=NUM_WORKERS) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE * 2 // ITER_SIZE, shuffle=False, num_workers=NUM_WORKERS) model = BirdsongModel(PARAMS) if 'pretrained' in model.params['nn_module'][1]: model.params['nn_module'][1]['pretrained'] = False if USE_AMP: initialize_amp(model) model.set_device(DEVICES) num_iterations = (5 * len(train_dataset)) // BATCH_SIZE callbacks = [ MonitorCheckpoint(save_dir, monitor='val_loss', max_saves=1), CosineAnnealingLR(T_max=num_iterations, eta_min=0, step_on_iteration=True), EarlyStopping(monitor='val_loss', patience=12), LoggingToFile(save_dir / 'log.txt'), LoggingToCSV(save_dir / 'log.csv') ] model.fit(train_loader, val_loader=val_loader, num_epochs=EPOCHS, callbacks=callbacks, metrics=['f1_score']) del model model_path = get_best_model_path(save_dir) model = load_model(model_path) val_dataset = BirdsongDataset(folds_data, folds=val_folds + [config.n_folds], transform=val_transform) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE * 2 // ITER_SIZE, shuffle=False, num_workers=NUM_WORKERS) model.set_device(DEVICES[0]) model.validate(val_loader, metrics=['f1_score'], callbacks=[ LoggingToFile(save_dir / 'log.txt'), LoggingToCSV(save_dir / 'log.csv') ])
model_path = config.experiments_dir / 'corr_noisy_007/fold_0/model-072-0.886906.pth' if not model_path.exists(): model_path.parent.mkdir(parents=True, exist_ok=True) gdown.download( 'https://drive.google.com/uc?id=1vf6LtRHlpxCC-CjyCguM4JCrso2v3Tip', str(model_path), quiet=False) DEVICE = 'cpu' CROP_SIZE = 256 BATCH_SIZE = 16 TILE_STEP = 2 PREDICTOR = Predictor(model_path, get_transforms(False, CROP_SIZE), BATCH_SIZE, (config.audio.n_mels, CROP_SIZE), (config.audio.n_mels, CROP_SIZE // TILE_STEP), device=DEVICE) signal_block_size = config.audio.sampling_rate SPEC_BLOCK_SIZE = 64 spec_num = 4 SPEC_LST = [ np.zeros((config.audio.n_mels, SPEC_BLOCK_SIZE), dtype=np.float32) ] * spec_num PREV_SIGNAL = np.zeros(signal_block_size, dtype=np.float32) def audio_callback(indata, frames, time, status):
pred = np.concatenate(pred_lst) target = np.concatenate(target_lst) names = np.concatenate(image_names) np.savez( val_prediction_dir / f'preds-{NAME}.npz', altered_pred=pred, altered_target=target, name=names, ) if __name__ == "__main__": if check_dir_not_exist(PREDICTION_DIR): fold_dir = EXPERIMENT_DIR / f'fold_0' model_path = get_best_model_path(fold_dir) print("Model path", model_path) predictor = Predictor(model_path, batch_size=BATCH_SIZE, transform=get_transforms(train=False), device=DEVICE, logits=LOGITS, tta=TTA) print("Test predict") predict_test(predictor) print("Val predict") predict_validation(predictor)
args = parser.parse_args() BATCH_SIZE = 16 CROP_SIZE = 256 DATASET_SIZE = 128 * 256 NOISY_PROB = 0.01 CORR_NOISY_PROB = 0.42 MIXER_PROB = 0.8 WRAP_PAD_PROB = 0.5 CORRECTIONS = True train_transfrom = get_transforms(train=True, size=CROP_SIZE, wrap_pad_prob=WRAP_PAD_PROB, resize_scale=(0.8, 1.0), resize_ratio=(1.7, 2.3), resize_prob=0.33, spec_num_mask=2, spec_freq_masking=0.15, spec_time_masking=0.20, spec_prob=0.5) mixer = RandomMixer([ SigmoidConcatMixer(sigmoid_range=(3, 12)), AddMixer(alpha_dist='uniform') ], p=[0.6, 0.4]) mixer = UseMixerWithProb(mixer, prob=MIXER_PROB) train_csv = pd.read_csv('./train_fold' + str(1) + '.csv') val_csv = pd.read_csv('./val_fold' + str(1) + '.csv')
def get_loader(df: pd.DataFrame, datadir: Path, config: dict, phase: str, event_level_labels=None, calltype_labels=None): dataset_config = config["dataset"] if dataset_config["name"] == "PANNsDataset": transforms = get_transforms(config, phase) loader_config = config["loader"][phase] dataset = datasets.PANNsDataset( df, datadir=datadir, transforms=transforms) elif dataset_config["name"] == "PANNsMultiLabelDataset": transforms = get_transforms(config, phase) loader_config = config["loader"][phase] period = dataset_config["params"][phase]["period"] dataset = datasets.PANNsMultiLabelDataset( df, datadir=datadir, transforms=transforms, period=period) elif dataset_config["name"] == "MultiChannelDataset": waveform_transforms = get_waveform_transforms(config, phase) spectrogram_transforms = get_spectrogram_transforms(config, phase) melspectrogram_parameters = dataset_config["params"]["melspectrogram_parameters"] pcen_parameters = dataset_config["params"]["pcen_parameters"] period = dataset_config["params"]["period"][phase] loader_config = config["loader"][phase] dataset = datasets.MultiChannelDataset( df, datadir=datadir, img_size=dataset_config["img_size"], waveform_transforms=waveform_transforms, spectrogram_transforms=spectrogram_transforms, melspectrogram_parameters=melspectrogram_parameters, pcen_parameters=pcen_parameters, period=period) elif dataset_config["name"] == "LabelCorrectionDataset": waveform_transforms = get_waveform_transforms(config, phase) spectrogram_transforms = get_spectrogram_transforms(config, phase) melspectrogram_parameters = dataset_config["params"]["melspectrogram_parameters"] pcen_parameters = dataset_config["params"]["pcen_parameters"] period = dataset_config["params"]["period"][phase] n_segments = dataset_config["params"]["n_segments"][phase] soft_label_dir = Path(dataset_config["params"]["soft_label_dir"]) threshold = dataset_config["params"].get("threshold", 0.5) loader_config = config["loader"][phase] dataset = datasets.LabelCorrectionDataset( df, datadir=datadir, soft_label_dir=soft_label_dir, img_size=dataset_config["img_size"], waveform_transforms=waveform_transforms, spectrogram_transforms=spectrogram_transforms, melspectrogram_parameters=melspectrogram_parameters, pcen_parameters=pcen_parameters, period=period, n_segments=n_segments, threshold=threshold) else: raise NotImplementedError loader = data.DataLoader(dataset, **loader_config) return loader
def train_fold(save_dir, train_folds, val_folds, local_rank=0, distributed=False, pretrain_dir=''): folds_data = get_folds_data() model = AlaskaModel(PARAMS) model.params['nn_module'][1]['pretrained'] = False if pretrain_dir: pretrain_path = get_best_model_path(pretrain_dir) if pretrain_path is not None: print(f'Pretrain model path {pretrain_path}') load_pretrain_weigths(model, pretrain_path) else: print(f"Pretrain model not found in '{pretrain_dir}'") if USE_AMP: initialize_amp(model) if distributed: model.nn_module = SyncBatchNorm.convert_sync_batchnorm(model.nn_module) model.nn_module = DistributedDataParallel( model.nn_module.to(local_rank), device_ids=[local_rank], output_device=local_rank) if local_rank: model.logger.disabled = True else: model.set_device(DEVICES) if USE_EMA: initialize_ema(model, decay=0.9999) checkpoint = EmaMonitorCheckpoint else: checkpoint = MonitorCheckpoint for epochs, stage in zip(TRAIN_EPOCHS, STAGE): test_transform = get_transforms(train=False) if stage == 'train': mixer = RandomMixer([BitMix(gamma=0.25), EmptyMix()], p=[0., 1.]) train_transform = get_transforms(train=True) else: mixer = EmptyMix() train_transform = get_transforms(train=False) train_dataset = AlaskaDataset(folds_data, train_folds, transform=train_transform, mixer=mixer) val_dataset = AlaskaDataset(folds_data, val_folds, transform=test_transform) val_sampler = AlaskaSampler(val_dataset, train=False) if distributed: train_sampler = AlaskaDistributedSampler(train_dataset) else: train_sampler = AlaskaSampler(train_dataset, train=True) train_loader = DataLoader(train_dataset, sampler=train_sampler, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE) val_loader = DataLoader(val_dataset, sampler=val_sampler, num_workers=NUM_WORKERS, batch_size=VAL_BATCH_SIZE) callbacks = [] if local_rank == 0: callbacks += [ checkpoint(save_dir, monitor='val_weighted_auc', max_saves=5, file_format=stage + '-model-{epoch:03d}-{monitor:.6f}.pth'), LoggingToFile(save_dir / 'log.txt'), LoggingToCSV(save_dir / 'log.csv', append=True) ] if stage == 'train': callbacks += [ CosineAnnealingLR(T_max=epochs, eta_min=get_lr(9e-6, WORLD_BATCH_SIZE)) ] elif stage == 'warmup': warmup_iterations = epochs * (len(train_sampler) / BATCH_SIZE) callbacks += [ LambdaLR(lambda x: x / warmup_iterations, step_on_iteration=True) ] if stage == 'train': @argus.callbacks.on_epoch_start def schedule_mixer_prob(state): bitmix_prob = state.epoch / epochs mixer.p = [bitmix_prob, 1 - bitmix_prob] state.logger.info(f"Mixer probabilities {mixer.p}") callbacks += [schedule_mixer_prob] if distributed: @argus.callbacks.on_epoch_complete def schedule_sampler(state): train_sampler.set_epoch(state.epoch + 1) callbacks += [schedule_sampler] metrics = ['weighted_auc', Accuracy('stegano'), Accuracy('quality')] model.fit(train_loader, val_loader=val_loader, num_epochs=epochs, callbacks=callbacks, metrics=metrics)
def train_experiment(folds_data, noisy_data, num): experiment_dir = SAVE_DIR / f'{num:04}' np.random.seed(num) random.seed(num) random_params = { 'p_dropout': float(np.random.uniform(0.1, 0.3)), 'batch_size': int(np.random.choice([128])), 'lr': float(np.random.choice([0.001, 0.0006, 0.0003])), 'add_prob': float(np.random.uniform(0.0, 1.0)), 'noisy_prob': float(np.random.uniform(0.0, 1.0)), 'lsoft_beta': float(np.random.uniform(0.2, 0.8)), 'noisy_weight': float(np.random.uniform(0.3, 0.7)), 'patience': int(np.random.randint(2, 10)), 'factor': float(np.random.uniform(0.5, 0.8)) } pprint(random_params) params = { 'nn_module': ('SimpleKaggle', { 'num_classes': len(config.classes), 'dropout': random_params['p_dropout'], 'base_size': 64 }), 'loss': ('OnlyNoisyLSoftLoss', { 'beta': random_params['lsoft_beta'], 'noisy_weight': random_params['noisy_weight'], 'curated_weight': 1 - random_params['noisy_weight'] }), 'optimizer': ('Adam', {'lr': random_params['lr']}), 'device': 'cuda', 'amp': { 'opt_level': 'O2', 'keep_batchnorm_fp32': True, 'loss_scale': "dynamic" } } pprint(params) try: train_transfrom = get_transforms(True, CROP_SIZE) curated_dataset = FreesoundDataset(folds_data, TRAIN_FOLDS, transform=train_transfrom, add_prob=random_params['add_prob']) noisy_dataset = FreesoundNoisyDataset(noisy_data, transform=train_transfrom) train_dataset = CombinedDataset(noisy_dataset, curated_dataset, noisy_prob=random_params['noisy_prob'], size=DATASET_SIZE) val_dataset = FreesoundDataset(folds_data, VAL_FOLDS, get_transforms(False, CROP_SIZE)) train_loader = DataLoader(train_dataset, batch_size=random_params['batch_size'], shuffle=True, drop_last=True, num_workers=NUM_WORKERS) val_loader = DataLoader(val_dataset, batch_size=random_params['batch_size'] * 2, shuffle=False, num_workers=NUM_WORKERS) model = FreesoundModel(params) callbacks = [ MonitorCheckpoint(experiment_dir, monitor='val_lwlrap', max_saves=1), ReduceLROnPlateau(monitor='val_lwlrap', patience=random_params['patience'], factor=random_params['factor'], min_lr=1e-8), EarlyStopping(monitor='val_lwlrap', patience=20), LoggingToFile(experiment_dir / 'log.txt'), ] with open(experiment_dir / 'random_params.json', 'w') as outfile: json.dump(random_params, outfile) model.fit(train_loader, val_loader=val_loader, max_epochs=100, callbacks=callbacks, metrics=['multi_accuracy', 'lwlrap']) except KeyboardInterrupt as e: raise e except BaseException as e: print(f"Exception '{e}' with random params '{random_params}'")