def _validate(): """検証&閾値決定。""" logger = tk.log.get(__name__) X, y = _data.load_train_data() pred = predict_all('val', X, use_cache=True) # TODO: 仮!, use_cache=True # 閾値の調整 pred_bin = joblib.load(CACHE_DIR / 'val' / 'bin_nas.pkl') pred_reg = joblib.load(CACHE_DIR / 'val' / 'reg_nas.pkl') threshold_X, threshold_y = _ath.create_data(y, pred, pred_bin, pred_reg) split_seed = int((MODELS_DIR / 'split_seed.txt').read_text()) thresholds = np.empty((len(y), )) for cv_index in range(CV_COUNT): ti, vi = tk.ml.cv_indices(X, y, cv_count=CV_COUNT, cv_index=cv_index, split_seed=split_seed, stratify=False) ath_estimator = _ath.create_estimator(threshold_X[ti], threshold_y[ti]) thresholds[vi] = ath_estimator.predict(threshold_X[vi]) # CVで決めた閾値でevaluate _evaluation.log_evaluation(y, pred, print_fn=logger.info, threshold=thresholds) # 全体で学習しなおして保存 ath_estimator = _ath.create_estimator(threshold_X, threshold_y) joblib.dump(ath_estimator, MODELS_DIR / 'ath_estimator.pkl') # 最後にインチキ閾値でevaluate thresholds = ath_estimator.predict(threshold_X) _evaluation.log_evaluation(y, pred, print_fn=logger.info, threshold=thresholds)
def _validate(): """検証&閾値決定。""" logger = tk.log.get(__name__) X, y = _data.load_train_data() pred = predict_all('val', X) threshold = _evaluation.log_evaluation(y, pred, print_fn=logger.info, search_th=True) (MODELS_DIR / 'threshold.txt').write_text(str(threshold))
def _train(args): logger = tk.log.get(__name__) logger.info(f'args: {args}') split_seed = int(MODEL_NAME.encode('utf-8').hex(), 16) % 10000000 MODELS_DIR.mkdir(parents=True, exist_ok=True) (MODELS_DIR / 'split_seed.txt').write_text(str(split_seed)) X, y = _data.load_train_data() X = _mf.get_meta_features('val', X) ti, vi = tk.ml.cv_indices(X, y, cv_count=CV_COUNT, cv_index=args.cv_index, split_seed=split_seed, stratify=False) (X_train, y_train), (X_val, y_val) = (X[ti], y[ti]), (X[vi], y[vi]) logger.info(f'cv_index={args.cv_index}: train={len(y_train)} val={len(y_val)}') network, _ = _create_network(input_dims=X.shape[-1]) gen = tk.generator.Generator() gen.add(tk.image.RandomFlipLR(probability=0.5, with_output=True)) # gen.add(tk.image.Padding(probability=1, with_output=True)) # gen.add(tk.image.RandomRotate(probability=0.25, with_output=True)) # gen.add(tk.image.RandomCrop(probability=1, with_output=True)) # gen.add(tk.image.Resize((101, 101), with_output=True)) model = tk.dl.models.Model(network, gen, batch_size=BATCH_SIZE) model.compile(sgd_lr=0.01 / 128, loss=tk.dl.losses.lovasz_hinge_elup1, metrics=[tk.dl.metrics.binary_accuracy], clipnorm=10.0) model.fit( X_train, y_train, validation_data=(X_val, y_val), epochs=EPOCHS, reduce_lr_epoch_rates=(0.5, 0.75, 0.875), mixup=False, lr_warmup=False) model.save(MODELS_DIR / f'model.fold{args.cv_index}.h5', include_optimizer=False) if tk.dl.hvd.is_master(): _evaluation.log_evaluation(y_val, model.predict(X_val))
def _validate(): """検証&閾値決定。""" logger = tk.log.get(__name__) X, y = _data.load_train_data() y = np.mean(y, axis=(1, 2, 3)) pred = predict_all('val', X) tk.ml.print_regression_metrics(y, pred, print_fn=logger.info)
def _validate(): """検証&閾値決定。""" logger = tk.log.get(__name__) X, y = _data.load_train_data() y = np.max(y > 0.5, axis=(1, 2, 3)).astype(np.uint8) # 0 or 1 pred = predict_all('val', X) tk.ml.print_classification_metrics(y, pred, print_fn=logger.info)
def validate(): train_set = _data.load_train_data() folds = tk.validation.split(train_set, nfold, stratify=True, split_seed=split_seed) model = create_model().load(models_dir) pred = model.predict_oof(train_set, folds) _data.save_oofp(models_dir, train_set, pred)
def train(): train_set = _data.load_train_data() folds = tk.validation.split(train_set, nfold, stratify=True, split_seed=split_seed) model = create_model() evals = model.cv(train_set, folds) tk.notifications.post_evals(evals)
def _validate(): """検証&閾値決定。""" logger = tk.log.get(__name__) X, y = _data.load_train_data() split_seed = int((MODELS_DIR / 'split_seed.txt').read_text()) ti, vi = tk.ml.cv_indices(X, y, cv_count=CV_COUNT, cv_index=0, split_seed=split_seed, stratify=False) X_val, y_val = X[vi], y[vi] pred_val = predict_all('val', X_val) threshold = _evaluation.log_evaluation(y_val, pred_val, print_fn=logger.info, search_th=True) (MODELS_DIR / 'threshold.txt').write_text(str(threshold))
def _validate(): """検証。""" logger = tk.log.get(__name__) X_train, y_train = _data.load_train_data() pred = predict_all('val', X_train) threshold = _evaluation.log_evaluation(y_train, pred, print_fn=logger.info, search_th=True) MODELS_DIR.mkdir(parents=True, exist_ok=True) (MODELS_DIR / 'threshold.txt').write_text(str(threshold))
def _train(args, fine=False): logger = tk.log.get(__name__) logger.info(f'args: {args}') split_seed = int(MODEL_NAME.encode('utf-8').hex(), 16) % 10000000 MODELS_DIR.mkdir(parents=True, exist_ok=True) (MODELS_DIR / 'split_seed.txt').write_text(str(split_seed)) X, y = _data.load_train_data() ti, vi = tk.ml.cv_indices(X, y, cv_count=CV_COUNT, cv_index=args.cv_index, split_seed=split_seed, stratify=False) (X_train, y_train), (X_val, y_val) = (X[ti], y[ti]), (X[vi], y[vi]) logger.info(f'cv_index={args.cv_index}: train={len(y_train)} val={len(y_val)}') network, lr_multipliers = _create_network() gen = tk.generator.Generator() if fine: pseudo_size = len(y_train) // 2 X_train = np.array(list(X_train) + [None] * pseudo_size) y_train = np.array(list(y_train) + [None] * pseudo_size) X_test = _data.load_test_data() _, pi = tk.ml.cv_indices(X_test, np.zeros((len(X_test),)), cv_count=CV_COUNT, cv_index=args.cv_index, split_seed=split_seed, stratify=False) #pred_test = predict_all('test', None, use_cache=True)[(args.cv_index + 1) % CV_COUNT] # cross-pseudo-labeling import stack_res pred_test = stack_res.predict_all('test', None, use_cache=True)[(args.cv_index + 1) % CV_COUNT] # cross-pseudo-labeling gen.add(tk.generator.RandomPickData(X_test[pi], pred_test[pi])) gen.add(tk.image.RandomFlipLR(probability=0.5, with_output=True)) gen.add(tk.image.Padding(probability=1, with_output=True)) gen.add(tk.image.RandomRotate(probability=0.25, with_output=True)) gen.add(tk.image.RandomCrop(probability=1, with_output=True)) gen.add(tk.image.RandomAugmentors([ tk.image.RandomBlur(probability=0.125), tk.image.RandomUnsharpMask(probability=0.125), tk.image.RandomBrightness(probability=0.25), tk.image.RandomContrast(probability=0.25), ], probability=0.125)) gen.add(tk.image.Resize((101, 101), with_output=True)) model = tk.dl.models.Model(network, gen, batch_size=BATCH_SIZE) if fine: model.load_weights(MODELS_DIR / f'model.fold{args.cv_index}.h5') model.compile(sgd_lr=0.001 / 128 if fine else 0.1 / 128, loss=tk.dl.losses.lovasz_hinge_elup1, metrics=[tk.dl.metrics.binary_accuracy], lr_multipliers=lr_multipliers, clipnorm=10.0) model.fit( X_train, y_train, validation_data=(X_val, y_val), epochs=EPOCHS // 3 if fine else EPOCHS, cosine_annealing=True, mixup=False) model.save(MODELS_DIR / f'model.fold{args.cv_index}.h5', include_optimizer=False) if tk.dl.hvd.is_master(): _evaluation.log_evaluation(y_val, model.predict(X_val))
def load_train_data(): dataset = _data.load_train_data() return dataset
def load_test_data(): mf = load_mf("test") dataset = _data.load_train_data() for mf_i in mf: dataset.data = mf_i yield dataset
def load_train_data(): dataset = _data.load_train_data() dataset.data = load_mf("train") return dataset