Example #1
0
def _predict():
    """予測。"""
    logger = tk.log.get(__name__)
    X_test = _data.load_test_data()
    threshold = float((MODELS_DIR / 'threshold.txt').read_text())
    logger.info(f'threshold = {threshold:.3f}')
    pred = predict_all('test', X_test) > threshold
    _data.save_submission(MODELS_DIR / 'submission.csv', pred)
Example #2
0
def predict():
    logger.info(f"source_models = {source_models}")
    test_set = _data.load_test_data()
    pred = np.average(
        [_load_pred(n) for n in model_names],
        weights=model_weights,
        axis=0,
    )
    _data.save_prediction(models_dir, test_set, pred)
Example #3
0
def _predict():
    """予測。"""
    X_test = _data.load_test_data()
    pred = predict_all('test', X_test)
    _data.save_submission(MODELS_DIR / 'submission.csv', pred > 0.50)
    _data.save_submission(MODELS_DIR / 'submission_0.40.csv', pred > 0.40)
    _data.save_submission(MODELS_DIR / 'submission_0.45.csv', pred > 0.45)
    _data.save_submission(MODELS_DIR / 'submission_0.55.csv', pred > 0.55)
    _data.save_submission(MODELS_DIR / 'submission_0.60.csv', pred > 0.60)
Example #4
0
def _train(args, fine=False):
    logger = tk.log.get(__name__)
    logger.info(f'args: {args}')

    split_seed = int(MODEL_NAME.encode('utf-8').hex(), 16) % 10000000
    MODELS_DIR.mkdir(parents=True, exist_ok=True)
    (MODELS_DIR / 'split_seed.txt').write_text(str(split_seed))

    X, y = _data.load_train_data()
    ti, vi = tk.ml.cv_indices(X, y, cv_count=CV_COUNT, cv_index=args.cv_index, split_seed=split_seed, stratify=False)
    (X_train, y_train), (X_val, y_val) = (X[ti], y[ti]), (X[vi], y[vi])
    logger.info(f'cv_index={args.cv_index}: train={len(y_train)} val={len(y_val)}')

    network, lr_multipliers = _create_network()

    gen = tk.generator.Generator()
    if fine:
        pseudo_size = len(y_train) // 2
        X_train = np.array(list(X_train) + [None] * pseudo_size)
        y_train = np.array(list(y_train) + [None] * pseudo_size)
        X_test = _data.load_test_data()
        _, pi = tk.ml.cv_indices(X_test, np.zeros((len(X_test),)), cv_count=CV_COUNT, cv_index=args.cv_index, split_seed=split_seed, stratify=False)
        #pred_test = predict_all('test', None, use_cache=True)[(args.cv_index + 1) % CV_COUNT]  # cross-pseudo-labeling
        import stack_res
        pred_test = stack_res.predict_all('test', None, use_cache=True)[(args.cv_index + 1) % CV_COUNT]  # cross-pseudo-labeling
        gen.add(tk.generator.RandomPickData(X_test[pi], pred_test[pi]))
    gen.add(tk.image.RandomFlipLR(probability=0.5, with_output=True))
    gen.add(tk.image.Padding(probability=1, with_output=True))
    gen.add(tk.image.RandomRotate(probability=0.25, with_output=True))
    gen.add(tk.image.RandomCrop(probability=1, with_output=True))
    gen.add(tk.image.RandomAugmentors([
        tk.image.RandomBlur(probability=0.125),
        tk.image.RandomUnsharpMask(probability=0.125),
        tk.image.RandomBrightness(probability=0.25),
        tk.image.RandomContrast(probability=0.25),
    ], probability=0.125))
    gen.add(tk.image.Resize((101, 101), with_output=True))

    model = tk.dl.models.Model(network, gen, batch_size=BATCH_SIZE)
    if fine:
        model.load_weights(MODELS_DIR / f'model.fold{args.cv_index}.h5')
    model.compile(sgd_lr=0.001 / 128 if fine else 0.1 / 128, loss=tk.dl.losses.lovasz_hinge_elup1,
                  metrics=[tk.dl.metrics.binary_accuracy], lr_multipliers=lr_multipliers, clipnorm=10.0)
    model.fit(
        X_train, y_train, validation_data=(X_val, y_val),
        epochs=EPOCHS // 3 if fine else EPOCHS,
        cosine_annealing=True, mixup=False)
    model.save(MODELS_DIR / f'model.fold{args.cv_index}.h5', include_optimizer=False)

    if tk.dl.hvd.is_master():
        _evaluation.log_evaluation(y_val, model.predict(X_val))
Example #5
0
def predict_all(data_name, X, use_cache=False):
    """予測。"""
    cache_path = CACHE_DIR / data_name / f'{MODEL_NAME}.pkl'
    if use_cache and cache_path.is_file():
        return joblib.load(cache_path)

    if data_name == 'val':
        X_list, vi_list = [], []
        split_seed = int((MODELS_DIR / 'split_seed.txt').read_text())
        for cv_index in range(CV_COUNT):
            _, vi = tk.ml.cv_indices(X,
                                     None,
                                     cv_count=CV_COUNT,
                                     cv_index=cv_index,
                                     split_seed=split_seed,
                                     stratify=False)
            X_list.append(X[vi])
            vi_list.append(vi)
    else:
        X = _data.load_test_data()
        X_list = [X] * CV_COUNT

    gen = tk.generator.SimpleGenerator()
    model = tk.dl.models.Model.load(MODELS_DIR / f'model.fold0.h5',
                                    gen,
                                    batch_size=BATCH_SIZE,
                                    multi_gpu=True)

    pred_list = []
    for cv_index in tk.tqdm(range(CV_COUNT), desc='predict'):
        if cv_index != 0:
            model.load_weights(MODELS_DIR / f'model.fold{cv_index}.h5')

        X_t = X_list[cv_index]
        pred = _evaluation.predict_tta(model, X_t, mode='bin')
        pred_list.append(pred)

    if data_name == 'val':
        pred = np.empty((len(X), 1), dtype=np.float32)
        for vi, p in zip(vi_list, pred_list):
            pred[vi] = p
    else:
        pred = pred_list

    cache_path.parent.mkdir(parents=True, exist_ok=True)
    joblib.dump(pred, cache_path, compress=3)
    return pred
Example #6
0
def load_test_data():
    dataset = _data.load_test_data()
    return dataset
Example #7
0
def _predict():
    """予測。"""
    X_test = _data.load_test_data()
    pred_list = predict_all('test', X_test)
    pred = np.mean(pred_list, axis=0) > 0.5
    _data.save_submission(MODELS_DIR / 'submission.csv', pred)
Example #8
0
def _predict():
    """予測。"""
    X_test = _data.load_test_data()
    predict_all('test', X_test)
Example #9
0
def predict():
    test_set = _data.load_test_data()
    model = create_model().load(models_dir)
    pred_list = model.predict_all(test_set)
    pred = np.mean(pred_list, axis=0)
    _data.save_prediction(models_dir, test_set, pred)