Beispiel #1
0
def main(params):
    """ PIPELINE candidate clustering

    :param {str: any} params:
    """
    params['path_expt'] = os.path.join(params['path_output'], FOLDER_EXPERIMENT % params['name'])
    tl_expt.save_config_yaml(os.path.join(params['path_expt'], NAME_YAML_PARAMS), params)
    tl_expt.create_subfolders(params['path_expt'], LIST_SUBDIRS)

    list_paths = [params[k] for k in ['path_images', 'path_segms', 'path_centers']]
    df_paths = tl_data.find_files_match_names_across_dirs(list_paths)
    df_paths.columns = ['path_image', 'path_segm', 'path_points']
    df_paths.index = range(1, len(df_paths) + 1)
    path_cover = os.path.join(params['path_expt'], run_train.NAME_CSV_TRIPLES)
    df_paths.to_csv(path_cover)

    logging.info('run clustering...')
    df_paths_new = pd.DataFrame()
    _wrapper_clustering = partial(cluster_points_draw_export, params=params, path_out=params['path_expt'])
    rows = (dict(row) for idx, row in df_paths.iterrows())
    iterate = tl_expt.WrapExecuteSequence(_wrapper_clustering, rows, nb_workers=params['nb_workers'])
    for dict_center in iterate:
        df_paths_new = df_paths_new.append(dict_center, ignore_index=True)

    df_paths_new.set_index('image', inplace=True)
    df_paths_new.to_csv(path_cover)
Beispiel #2
0
def view_func_params(frame=inspect.currentframe(), path_out=''):
    """ view function parameters

    :param frame:
    :param str path_out:
    :return dict:

    >>> view_func_params()  # doctest: +ELLIPSIS
    {...}
    """
    _, _, _, values = inspect.getargvalues(frame)
    logging.info('PARAMETERS: \n%s',
                 '\n'.join('"{}": \t {}'.format(k, values[k]) for k in values))
    if os.path.exists(path_out):
        save_config_yaml(os.path.join(path_out, NAME_CONFIG), values)
    return values
def main_train(params):
    """ PIPELINE for training
    0) load triplets or create triplets from path to images, annotations
    1) load precomputed data or compute them now
    2) train classifier with hyper-parameters
    3) perform Leave-One-Out experiment

    :param {str: any} params:
    """
    params = prepare_experiment_folder(params, FOLDER_EXPERIMENT)

    tl_expt.set_experiment_logger(params['path_expt'])
    logging.info(tl_expt.string_dict(params, desc='PARAMETERS'))
    tl_expt.save_config_yaml(
        os.path.join(params['path_expt'], NAME_YAML_PARAMS), params)
    tl_expt.create_subfolders(params['path_expt'], LIST_SUBDIRS)

    df_paths, _ = load_df_paths(params)

    path_dump_data = os.path.join(params['path_expt'], NAME_DUMP_TRAIN_DATA)
    if not os.path.isfile(path_dump_data) or FORCE_RECOMP_DATA:
        (dict_imgs, dict_segms, dict_slics, dict_points, dict_centers,
         dict_features, dict_labels, feature_names) = \
            dataset_load_images_segms_compute_features(params, df_paths, params['nb_workers'])
        assert len(dict_imgs) > 0, 'missing images'
        save_dump_data(
            path_dump_data,
            dict_imgs,
            dict_segms,
            dict_slics,
            dict_points,
            dict_centers,
            dict_features,
            dict_labels,
            feature_names,
        )
    else:
        (dict_imgs, dict_segms, dict_slics, dict_points, dict_centers,
         dict_features, dict_labels,
         feature_names) = load_dump_data(path_dump_data)

    if is_drawing(params['path_expt']) and EXPORT_TRAINING_DATA:
        export_dataset_visual(params['path_expt'], dict_imgs, dict_segms,
                              dict_slics, dict_points, dict_labels,
                              params['nb_workers'])

    # concentrate features, labels
    features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset(
        dict_features,
        dict_labels,
        drop_labels=[-1],
        balance_type=params['balance'])
    # remove all bad values from features space
    features[np.isnan(features)] = 0
    features[np.isinf(features)] = -1
    assert np.sum(sizes) == len(labels), \
        'not equal sizes (%d) and labels (%i)' \
        % (int(np.sum(sizes)), len(labels))

    # feature norm & train classification
    nb_holdout = int(np.ceil(len(sizes) * CROSS_VAL_LEAVE_OUT_SEARCH))
    cv = seg_clf.CrossValidateGroups(sizes, nb_holdout)
    classif, params[
        'path_classif'] = seg_clf.create_classif_search_train_export(
            params['classif'],
            features,
            labels,
            cross_val=cv,
            params=params,
            feature_names=feature_names,
            nb_search_iter=params['nb_classif_search'],
            pca_coef=params.get('pca_coef', None),
            nb_workers=params['nb_workers'],
            path_out=params['path_expt'],
        )
    nb_holdout = int(np.ceil(len(sizes) * CROSS_VAL_LEAVE_OUT_EVAL))
    cv = seg_clf.CrossValidateGroups(sizes, nb_holdout)
    seg_clf.eval_classif_cross_val_scores(params['classif'],
                                          classif,
                                          features,
                                          labels,
                                          cross_val=cv,
                                          path_out=params['path_expt'])
    seg_clf.eval_classif_cross_val_roc(params['classif'],
                                       classif,
                                       features,
                                       labels,
                                       cross_val=cv,
                                       path_out=params['path_expt'])

    if RUN_LEAVE_ONE_OUT:
        experiment_loo(classif, dict_imgs, dict_segms, dict_centers,
                       dict_slics, dict_points, dict_features, feature_names,
                       params)