def load_train_classifier(params, features, labels, feature_names, sizes,
                          nb_holdout):
    logging.info('train classifier...')
    seg_clf.feature_scoring_selection(features, labels, feature_names,
                                      path_out=params['path_exp'])
    cv = seg_clf.CrossValidatePSetsOut(sizes, nb_hold_out=nb_holdout)
    # feature norm & train classification
    fname_classif = seg_clf.TEMPLATE_NAME_CLF.format(params['classif'])
    path_classif = os.path.join(params['path_exp'], fname_classif)
    if os.path.isfile(path_classif) and not FORCE_RETRAIN_CLASSIF:
        logging.info('loading classifier: %s', path_classif)
        params_local = params.copy()
        dict_classif = seg_clf.load_classifier(path_classif)
        classif = dict_classif['clf_pipeline']
        params = dict_classif['params']
        params.update({k: params_local[k] for k in params_local
                       if k.startswith('path_') or k.startswith('gc_')})
        logging.debug('loaded PARAMETERS: %s', repr(params))
    else:
        classif, path_classif = seg_clf.create_classif_train_export(
                    params['classif'], features, labels, cross_val=cv,
                    params=params, feature_names=feature_names,
                    nb_search_iter=params['nb_classif_search'],
                    nb_jobs=params['nb_jobs'], pca_coef=params['pca_coef'],
                    path_out=params['path_exp'])
    params['path_classif'] = path_classif
    cv = seg_clf.CrossValidatePSetsOut(sizes, nb_hold_out=nb_holdout)
    seg_clf.eval_classif_cross_val_scores(params['classif'], classif,
                   features, labels, cross_val=cv, path_out=params['path_exp'])
    seg_clf.eval_classif_cross_val_roc(params['classif'], classif,
                   features, labels, cross_val=cv, path_out=params['path_exp'])
    return params, classif, path_classif
def retrain_loo_segment_image(imgs_idx_path, path_classif, path_dump,
                              path_out, path_visu):
    """ load the classifier, and dumped data, subtract the image,
    retrain the classif. without it and do the segmentation

    :param str path_img: path to input image
    :param str path_classif: path to saved classifier
    :param str path_dump: path to dumped data
    :param, str path_out: path to segmentation outputs
    :return (str, ndarray, ndarray):
    """
    idx, path_img = parse_imgs_idx_path(imgs_idx_path)
    dict_imgs, _, _, dict_features, dict_labels, _, _ = \
        load_dump_data(path_dump)
    dict_classif = seg_clf.load_classifier(path_classif)
    classif = dict_classif['clf_pipeline']
    params = dict_classif['params']

    idx_name = get_idx_name(idx, path_img)
    for d in [dict_features, dict_labels]:
        _ = d.pop(idx_name, None)
    assert (len(dict_imgs) - len(dict_features)) == 1, \
        'no image was dropped from training set'

    features, labels, _ = seg_clf.convert_set_features_labels_2_dataset(
        dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1])
    classif.fit(features, labels)

    idx_name, segm, segm_gc = segment_image(imgs_idx_path, params, classif,
                                            path_out, path_visu)
    # gc.collect(), time.sleep(1)
    return idx_name, segm, segm_gc
def main_predict(path_classif,
                 path_pattern_imgs,
                 path_out,
                 name='SEGMENT___',
                 params_local=None):
    """ given trained classifier segment new images

    :param str path_classif:
    :param str path_pattern_imgs:
    :param str path_out:
    :param str name:
    """
    logging.getLogger().setLevel(logging.INFO)
    logging.info('running PREDICTION...')
    assert path_pattern_imgs is not None

    dict_classif = seg_clf.load_classifier(path_classif)
    classif = dict_classif['clf_pipeline']
    params = dict_classif['params']
    if params_local is not None:
        params.update({
            k: params_local[k]
            for k in params_local
            if k.startswith('path_') or k.startswith('gc_')
        })

    path_out, path_visu = prepare_output_dir(path_pattern_imgs,
                                             path_out,
                                             name,
                                             visual=params.get(
                                                 'visual', False))
    tl_expt.set_experiment_logger(path_out)
    logging.info(tl_expt.string_dict(params, desc='PARAMETERS'))

    paths_img = sorted(glob.glob(path_pattern_imgs))
    logging.info('found %i images on path "%s"', len(paths_img),
                 path_pattern_imgs)

    logging.debug('run prediction...')
    show_debug_imgs = params.get('visual', False)
    _wrapper_segment = partial(
        try_segment_image,
        params=params,
        classif=classif,
        path_out=path_out,
        path_visu=path_visu,
        show_debug_imgs=show_debug_imgs,
    )
    list_img_path = list(zip([None] * len(paths_img), paths_img))
    iterate = tl_expt.WrapExecuteSequence(
        _wrapper_segment,
        list_img_path,
        nb_workers=params['nb_workers'],
        desc='segmenting images',
    )
    for _ in iterate:
        gc.collect()
        time.sleep(1)

    logging.info('prediction DONE')
def retrain_lpo_segment_image(list_imgs_idx_path,
                              path_classif,
                              path_dump,
                              path_out,
                              path_visu,
                              show_debug_imgs=SHOW_DEBUG_IMAGES):
    """ load the classifier, and dumped data, subtract the image,
    retrain the classif without it and do the segmentation

    :param list(str) list_imgs_idx_path: path to input image
    :param str path_classif: path to saved classifier
    :param str path_dump: path to dumped data
    :param, str path_out: path to segmentation outputs
    :param bool show_debug_imgs: whether show debug images
    :return (str, ndarray, ndarray):
    """
    dict_imgs, _, _, dict_features, dict_labels, _, _ = load_dump_data(
        path_dump)
    dict_classif = seg_clf.load_classifier(path_classif)
    classif = dict_classif['clf_pipeline']
    params = dict_classif['params']

    for idx, path_img in list_imgs_idx_path:
        idx_name = get_idx_name(idx, path_img)
        _ = dict_features.pop(idx_name, None)
        _ = dict_labels.pop(idx_name, None)
    if (len(dict_imgs) - len(dict_features)) != len(list_imgs_idx_path):
        raise ValueError(
            'subset of %i images was not dropped, training set %i from total %i'
            % (len(list_imgs_idx_path), len(dict_features), len(dict_imgs)))

    features, labels, _ = seg_clf.convert_set_features_labels_2_dataset(
        dict_features,
        dict_labels,
        balance_type=params['balance'],
        drop_labels=[-1, np.nan] + params.get('drop_labels', []))
    classif.fit(features, labels)

    dict_segm, dict_segm_gc = {}, {}
    for imgs_idx_path in list_imgs_idx_path:
        idx_name, segm, segm_gc = segment_image(
            imgs_idx_path,
            params,
            classif,
            path_out,
            path_visu,
            show_debug_imgs=show_debug_imgs)
        dict_segm[idx_name] = segm
        dict_segm_gc[idx_name] = segm_gc
    gc.collect()
    time.sleep(1)
    return dict_segm, dict_segm_gc
def main(params):
    """ PIPELINE for new detections

    :param {str: str} paths:
    """
    logging.info('running...')
    params = run_train.prepare_experiment_folder(params, FOLDER_EXPERIMENT)

    # run_train.check_pathes_patterns(paths)
    tl_expt.set_experiment_logger(params['path_expt'])
    logging.info('COMPUTER: \n%s', repr(os.uname()))
    logging.info(tl_expt.string_dict(params, desc='PARAMETERS'))

    tl_expt.create_subfolders(params['path_expt'], LIST_SUBFOLDER)

    path_csv = os.path.join(params['path_expt'], NAME_CSV_TRIPLES)
    df_paths = get_csv_triplets(params['path_list'],
                                path_csv,
                                params['path_images'],
                                params['path_segms'],
                                force_reload=FORCE_RERUN)

    dict_classif = seg_clf.load_classifier(params['path_classif'])
    params_clf = dict_classif['params']
    params_clf.update(params)
    logging.info(tl_expt.string_dict(params, desc='UPDATED PARAMETERS'))

    # perform on new images
    df_stat = pd.DataFrame()
    wrapper_detection = partial(load_compute_detect_centers,
                                params=params_clf,
                                path_classif=params['path_classif'],
                                path_output=params['path_expt'])
    iterate = tl_expt.WrapExecuteSequence(wrapper_detection,
                                          df_paths.iterrows(),
                                          nb_jobs=params['nb_jobs'])
    for dict_center in iterate:
        df_stat = df_stat.append(dict_center, ignore_index=True)
        df_stat.to_csv(os.path.join(params['path_expt'],
                                    NAME_CSV_TRIPLES_TEMP))

    df_stat.set_index(['image'], inplace=True)
    df_stat.to_csv(os.path.join(params['path_expt'], NAME_CSV_TRIPLES))
    logging.info('STATISTIC: \n %s', repr(df_stat.describe()))

    logging.info('DONE')
def load_compute_detect_centers(idx_row,
                                params,
                                classif=None,
                                path_classif='',
                                path_output=''):
    """ complete pipeline fon input image and seg_pipe, such that load them,
    generate points, compute features and using given classifier predict labels

    :param (int, DF:row) idx_row:
    :param dict params:
    :param obj classif:
    :param str path_classif:
    :param str path_output:
    :return {str: float}:
    """
    _, row = idx_row
    dict_center = dict(row)

    if not classif:
        dict_classif = seg_clf.load_classifier(path_classif)
        classif = dict_classif['clf_pipeline']

    try:
        path_show_in = os.path.join(path_output, FOLDER_INPUTS)
        name, img, segm, _ = run_train.load_image_segm_center(
            (None, row), path_show_in, params['dict_relabel'])
        t_start = time.time()
        _, slic, points, features, feature_names =\
            run_train.estim_points_compute_features(name, img, segm, params)
        dict_detect = run_train.detect_center_candidates(
            name, img, segm, None, slic, points, features, feature_names,
            params, path_output, classif)
        dict_detect['time elapsed'] = time.time() - t_start
        dict_center.update(dict_detect)

        dict_center = run_clust.cluster_points_draw_export(
            dict_center, params, path_output)
    except Exception:
        logging.exception('load_compute_detect_centers')
    gc.collect()
    time.sleep(1)
    return dict_center