Ejemplo n.º 1
0
def predict_all(data_name, X, use_cache=False, child_cv_index=None):
    """予測。"""
    cache_path = CACHE_DIR / data_name / f'{MODEL_NAME}.pkl'
    if use_cache and cache_path.is_file() and child_cv_index is None:
        return joblib.load(cache_path)

    if data_name == 'test' and child_cv_index is None:
        pred_bin = np.median(joblib.load(CACHE_DIR / 'test' / 'bin_nas.pkl'), axis=0)
        pred_reg = np.median(joblib.load(CACHE_DIR / 'test' / 'reg_nas.pkl'), axis=0)
        ath_estimator = joblib.load(MODELS_DIR / 'ath_estimator.pkl')
        pred = []
        for cci in range(5):
            y_child = []
            for pc in predict_all(data_name, X, use_cache, cci):
                threshold_X = _ath.create_input_data(pc, pred_bin, pred_reg)
                thresholds = ath_estimator.predict(threshold_X)
                y_child.append(pc > np.reshape(thresholds, (len(pc), 1, 1, 1)))
            pred.append(np.mean(y_child, axis=0))
        cache_path.parent.mkdir(parents=True, exist_ok=True)
        joblib.dump(pred, cache_path, compress=3)
        return pred

    if data_name == 'val':
        X_val = _mf.get_meta_features(data_name, X)
        X_list, vi_list = [], []
        split_seed = int((MODELS_DIR / 'split_seed.txt').read_text())
        for cv_index in range(CV_COUNT):
            _, vi = tk.ml.cv_indices(X_val, None, cv_count=CV_COUNT, cv_index=cv_index, split_seed=split_seed, stratify=False)
            X_list.append(X_val[vi])
            vi_list.append(vi)
    else:
        X_test = _mf.get_meta_features(data_name, X, child_cv_index)
        X_list = [X_test] * CV_COUNT

    gen = tk.generator.SimpleGenerator()
    model = tk.dl.models.Model.load(MODELS_DIR / f'model.fold0.h5', gen, batch_size=BATCH_SIZE, multi_gpu=True)

    pred_list = []
    for cv_index in tk.tqdm(range(CV_COUNT), desc='predict'):
        if cv_index != 0:
            model.load_weights(MODELS_DIR / f'model.fold{cv_index}.h5')

        X_t = X_list[cv_index]
        pred = np.mean([
            model.predict(X_t, verbose=0),
            model.predict(X_t[:, :, ::-1, :], verbose=0)[:, :, ::-1, :],
        ], axis=0)
        pred_list.append(pred)

    if data_name == 'val':
        pred = np.empty((len(X), 101, 101, 1), dtype=np.float32)
        for vi, p in zip(vi_list, pred_list):
            pred[vi] = p
    else:
        pred = pred_list

    if data_name != 'test':
        cache_path.parent.mkdir(parents=True, exist_ok=True)
        joblib.dump(pred, cache_path, compress=3)
    return pred
Ejemplo n.º 2
0
def _main():
    base_dir = pathlib.Path(__file__).resolve().parent
    vocdevkit_dir = base_dir / 'pytoolkit' / 'data' / 'VOCdevkit'
    save_dir = base_dir / '___assign_check'
    save_dir.mkdir(exist_ok=True)

    X_val, y_val = tk.data.voc.load_07_test(vocdevkit_dir)
    X_val, y_val = X_val[:1], y_val[:1]

    od = tk.dl.od.ObjectDetector.load_voc(batch_size=1, use_multi_gpu=False)

    gen = tk.dl.od.od_gen.create_generator((512, 512),
                                           preprocess_input=lambda x: x,
                                           encode_truth=od.pb.encode_truth)
    g, _ = gen.flow(X_val, y_val, data_augmentation=True)
    for i, (X_batch, y_batch) in zip(tk.tqdm(range(32)), g):
        for rgb, y in zip(X_batch, y_batch):
            obj_pb = y[:, 1] == 1  # assignされたもの全部
            classes = np.argmax(y[obj_pb, 2:-4], axis=-1)
            bboxes = od.pb.decode_locs(np.zeros((len(y), 4)),
                                       xp=np)[obj_pb, :]  # prior box自体の座標を取る

            img = tk.ml.plot_objects(rgb, classes, None, bboxes,
                                     tk.data.voc.CLASS_NAMES)
            tk.ndimage.save(save_dir / f'{i}.jpg', img)
Ejemplo n.º 3
0
def _load_image(X):
    X = np.array([
        cv2.imread(str(p), cv2.IMREAD_GRAYSCALE).astype(np.float32)
        for p in tk.tqdm(X, desc='load')
    ])
    X = np.expand_dims(X, axis=-1)
    return X
Ejemplo n.º 4
0
def create_data(y, pred, pred_bin, pred_reg):
    """Create adaptive threshold data."""
    threshold_X = create_input_data(pred, pred_bin, pred_reg)

    mask_neg = y.max(axis=(1, 2, 3)) == 0
    mask_pos = np.logical_not(mask_neg)
    y_pos = y > 0.5

    threshold_y = np.empty((len(y), ))

    th_neg = (pred[mask_neg].max(axis=(1, 2, 3)) + 1) / 2
    threshold_y[mask_neg] = np.minimum(np.maximum(th_neg, MIN_THRESHOLD),
                                       MAX_THRESHOLD)

    for i in tk.tqdm(np.where(mask_pos)[0], desc='ath'):
        threshold_list = np.linspace(MIN_THRESHOLD, MAX_THRESHOLD, 1000)
        iou_list = []
        for th in threshold_list:
            pred_pos = pred[i] > th
            inter = np.logical_and(pred_pos, y_pos[i])
            union = np.logical_or(pred_pos, y_pos[i])
            iou = np.sum(inter) / max(np.sum(union), 1)
            iou_list.append(iou)
        best_index = np.argmax(iou_list)
        threshold_y[i] = threshold_list[best_index]

    threshold_y = np.array(threshold_y)

    return threshold_X, threshold_y
Ejemplo n.º 5
0
def predict_all(data_name, X, use_cache=False, child_cv_index=None):
    """予測。"""
    cache_path = CACHE_DIR / data_name / f'{MODEL_NAME}.pkl'
    if use_cache and cache_path.is_file() and child_cv_index is None:
        return joblib.load(cache_path)

    if data_name == 'test' and child_cv_index is None:
        pred = [predict_all(data_name, X, use_cache, i) for i in range(5)]
        cache_path.parent.mkdir(parents=True, exist_ok=True)
        joblib.dump(pred, cache_path, compress=3)
        return pred

    if data_name == 'val':
        X_val = get_meta_features(data_name, X)
        X_list, vi_list = [], []
        split_seed = int((MODELS_DIR / 'split_seed.txt').read_text())
        for cv_index in range(CV_COUNT):
            _, vi = tk.ml.cv_indices(X_val,
                                     None,
                                     cv_count=CV_COUNT,
                                     cv_index=cv_index,
                                     split_seed=split_seed,
                                     stratify=False)
            X_list.append(X_val[vi])
            vi_list.append(vi)
    else:
        X_test = get_meta_features(data_name, X, child_cv_index)
        X_list = [X_test] * CV_COUNT

    gen = tk.generator.SimpleGenerator()
    model = tk.dl.models.Model.load(MODELS_DIR / f'model.fold0.h5',
                                    gen,
                                    batch_size=BATCH_SIZE,
                                    multi_gpu=True)

    pred_list = []
    for cv_index in tk.tqdm(range(CV_COUNT), desc='predict'):
        if cv_index != 0:
            model.load_weights(MODELS_DIR / f'model.fold{cv_index}.h5')

        X_t = X_list[cv_index]
        pred = model.predict(X_t, verbose=0)
        pred_list.append(pred)

    if data_name == 'val':
        pred = np.empty((len(X), 101, 101, 1), dtype=np.float32)
        for vi, p in zip(vi_list, pred_list):
            pred[vi] = p
    else:
        pred = np.mean(pred_list, axis=0)

    if data_name != 'test':
        cache_path.parent.mkdir(parents=True, exist_ok=True)
        joblib.dump(pred, cache_path, compress=3)
    return pred
Ejemplo n.º 6
0
def get_score_fixed_threshold(y_val, pred_val, search_th=True):
    """適当スコア算出。"""
    if search_th:
        threshold_list = np.linspace(0.3, 0.7, 100)
        score_list = []
        for th in tk.tqdm(threshold_list, desc='threshold'):
            score = compute_score(y_val > 0.5, pred_val > th)
            score_list.append(score)
        best_index = np.argmax(score_list)
        threshold = threshold_list[best_index]
        score = score_list[best_index]
    else:
        threshold = 0.5
        score = compute_score(y_val > 0.5, pred_val > threshold)
    return score, threshold
Ejemplo n.º 7
0
def predict_all(data_name, X, use_cache=False):
    """予測。"""
    cache_path = CACHE_DIR / data_name / f'{MODEL_NAME}.pkl'
    if use_cache and cache_path.is_file():
        return joblib.load(cache_path)

    if data_name == 'val':
        X_list, vi_list = [], []
        split_seed = int((MODELS_DIR / 'split_seed.txt').read_text())
        for cv_index in range(CV_COUNT):
            _, vi = tk.ml.cv_indices(X,
                                     None,
                                     cv_count=CV_COUNT,
                                     cv_index=cv_index,
                                     split_seed=split_seed,
                                     stratify=False)
            X_list.append(X[vi])
            vi_list.append(vi)
    else:
        X = _data.load_test_data()
        X_list = [X] * CV_COUNT

    gen = tk.generator.SimpleGenerator()
    model = tk.dl.models.Model.load(MODELS_DIR / f'model.fold0.h5',
                                    gen,
                                    batch_size=BATCH_SIZE,
                                    multi_gpu=True)

    pred_list = []
    for cv_index in tk.tqdm(range(CV_COUNT), desc='predict'):
        if cv_index != 0:
            model.load_weights(MODELS_DIR / f'model.fold{cv_index}.h5')

        X_t = X_list[cv_index]
        pred = _evaluation.predict_tta(model, X_t, mode='bin')
        pred_list.append(pred)

    if data_name == 'val':
        pred = np.empty((len(X), 1), dtype=np.float32)
        for vi, p in zip(vi_list, pred_list):
            pred[vi] = p
    else:
        pred = pred_list

    cache_path.parent.mkdir(parents=True, exist_ok=True)
    joblib.dump(pred, cache_path, compress=3)
    return pred
Ejemplo n.º 8
0
def _main():
    base_dir = pathlib.Path(__file__).resolve().parent
    vocdevkit_dir = base_dir / 'pytoolkit' / 'data' / 'VOCdevkit'
    save_dir = base_dir / '___generator_check'
    save_dir.mkdir(exist_ok=True)

    X_val, y_val = tk.data.voc.load_07_test(vocdevkit_dir)
    X_val, y_val = X_val[:1], y_val[:1]

    gen = tk.dl.od.od_gen.create_generator((512, 512),
                                           preprocess_input=lambda x: x,
                                           encode_truth=None)
    g, _ = gen.flow(X_val, y_val, data_augmentation=True)
    for i, (X_batch, y_batch) in zip(tk.tqdm(range(32)), g):
        for rgb, y in zip(X_batch, y_batch):
            img = tk.ml.plot_objects(rgb, y.classes, None, y.bboxes,
                                     tk.data.voc.CLASS_NAMES)
            tk.ndimage.save(save_dir / f'{i}.jpg', img)
Ejemplo n.º 9
0
def log_evaluation(y_val,
                   pred_val,
                   print_fn=None,
                   search_th=False,
                   threshold=None):
    """検証結果をログる。"""
    print_fn = print_fn or tk.log.get(__name__).info

    # 正解率とか
    tk.ml.print_classification_metrics(np.ravel(y_val),
                                       np.ravel(pred_val),
                                       print_fn=print_fn)

    # 閾値探索&スコア表示
    if search_th:
        assert threshold is None
        threshold_list = np.linspace(0.3, 0.7, 100)
        score_list = []
        for th in tk.tqdm(threshold_list, desc='threshold'):
            score = compute_score(np.int32(y_val > 0.5),
                                  np.int32(pred_val > th))
            score_list.append(score)
        best_index = np.argmax(score_list)
        print_fn('scores:')
        for th, score in zip(threshold_list[::10], score_list[::10]):
            print_fn(f'  threshold={th:.3f}: score={score:.3f}')
        threshold = threshold_list[best_index]
        score = score_list[best_index]
        print_fn(f'max score: {score:.3f} (threshold: {threshold:.3f})')
    else:
        if threshold is None:
            threshold = 0.5
        else:
            assert threshold.shape == (len(pred_val), )
            threshold = threshold.reshape((len(pred_val), 1, 1, 1))
            print_fn(f'mean threshold: {np.mean(threshold):.3f}')
        score = compute_score(np.int32(y_val > 0.5),
                              np.int32(pred_val > threshold))
        print_fn(f'score: {score:.3f}')

    # オレオレ指標
    print_metrics(y_val > 0.5, pred_val > threshold, print_fn=print_fn)

    return threshold
Ejemplo n.º 10
0
def save_submission(save_path, pred):
    """投稿用ファイルを出力。ついでにちょっとだけ結果を分析。"""
    # 投稿用ファイルを出力
    id_list = pd.read_csv(TEST_PATH)['id'].values
    pred_dict = {
        id_: _encode_rl(pred[i])
        for i, id_ in enumerate(tk.tqdm(id_list, desc='encode_rl'))
    }
    df = pd.DataFrame.from_dict(pred_dict, orient='index')
    df.index.names = ['id']
    df.columns = ['rle_mask']
    df.to_csv(str(save_path))
    # 結果を分析
    pred_bin = np.expand_dims(np.max(pred, axis=(1, 2, 3)).astype(np.uint8),
                              axis=-1)  # 0 or 1
    empty_count = len(pred_bin) - pred_bin.sum()
    logger = tk.log.get(__name__)
    logger.info(
        f'empty rate: {empty_count}/{len(pred_bin)} = {100 * empty_count / len(pred_bin):.1f}%'
    )