config_name = args.config.split("/")[-1].replace(".yml", "") expdir = Path(f"out/{config_name}") if not expdir.exists(): print(f"You need to train {config_name} first!") sys.exit(1) submission_file_dir = expdir / "submission" submission_file_dir.mkdir(parents=True, exist_ok=True) logger = utils.get_logger(expdir / "tta.log") # environment utils.set_seed(global_params["seed"]) device = training.get_device(global_params["device"]) # data tp, fp, train_all, test_all, train_audio, test_audio = datasets.get_metadata( config) submission = pd.read_csv(config["data"]["sample_submission_path"]) # labels labels = [] duration = config["dataset"]["valid"]["params"]["duration"] for _, sample in tp.iterrows(): t_min = sample["t_min"] t_max = sample["t_max"] flac_id = sample["recording_id"] call_duration = t_max - t_min relative_offset = (duration - call_duration) / 2 offset = min(max(0, t_min - relative_offset), 60 - duration) tail = offset + duration
global_params = config["globals"] # logging config_name = args.config.split("/")[-1].replace(".yml", "") logdir = Path(f"out/{config_name}") logdir.mkdir(exist_ok=True, parents=True) logger = utils.get_logger(logdir / "output.log") # environment utils.set_seed(global_params["seed"]) device = training.get_device(global_params["device"]) # data tp, fp, train_all, _, train_audio, _ = datasets.get_metadata(config) # validation splitter = training.get_split(config) ################################################## # Main Loop # ################################################## for i, (trn_idx, val_idx) in enumerate(splitter.split(train_all)): if i not in global_params["folds"]: continue logger.info("=" * 20) logger.info(f"Fold {i}") logger.info("=" * 20) trn_df = train_all.loc[trn_idx, :].reset_index(drop=True) val_df = train_all.loc[val_idx, :].reset_index(drop=True)
config_name = args.config.split("/")[-1].replace(".yml", "") expdir = Path(f"out/{config_name}") expdir.mkdir(exist_ok=True, parents=True) logger = utils.get_logger(expdir / "ensemble.log") oofs = [] submissions = [] names = [] for result_dict in config["results"]: oofs.append(pd.read_csv(result_dict["oof"])) submissions.append(pd.read_csv(result_dict["submission"])) names.append(result_dict["name"]) tp, _, _, _, _, _ = datasets.get_metadata(config) indices = tp[["index"]] for i in range(len(oofs)): oofs[i] = indices.merge(oofs[i], on="index", how="left") labels = [] for _, sample in tp.iterrows(): t_min = sample["t_min"] t_max = sample["t_max"] flac_id = sample["recording_id"] call_duration = t_max - t_min relative_offset = (10 - call_duration) / 2 offset = min(max(0, t_min - relative_offset), 60 - 10) tail = offset + 10