コード例 #1
0
def save_intersected_features(feature_sets, transform_sets, config):
    """
    This function saves raw covariates values at the target locations, i.e.,
    after the targets have been intersected.

    This will save the following two files if they are provided in the
    config file:
        a) rawcovariates.csv: the covariate values in csv
        b) rawcovariates_mask.csv: the corresponding mask in csv
    This function will also optionally output intersected covariates scatter
    plot.
    """
    transform_sets_mod = []
    names = [
        '{}_{}'.format(b, basename(k)) for ec in feature_sets for k in ec
        for b in range(ec[k].shape[3])
    ]

    header = ', '.join(names)
    for t in transform_sets:
        dummy_transform = transforms.ImageTransformSet(
            image_transforms=None,
            imputer=None,
            global_transforms=None,
            is_categorical=t.is_categorical)
        transform_sets_mod.append(dummy_transform)

    transformed_vectors = [
        t(c) for c, t in zip(feature_sets, transform_sets_mod)
    ]

    x = np.ma.concatenate(transformed_vectors, axis=1)
    x_all = gather_features(x, node=0)
    if mpiops.chunk_index == 0:
        np.savetxt(config.rawcovariates,
                   X=x_all.data,
                   delimiter=',',
                   fmt='%.4e',
                   header=header)
        np.savetxt(config.rawcovariates_mask,
                   X=x_all.mask.astype(int),
                   delimiter=',',
                   fmt='%.4e',
                   header=header)
        if config.plot_covariates:
            import matplotlib.pyplot as plt
            for i, name in enumerate(names):
                log.info('plotting {}'.format(name))
                plt.figure()
                vals = x_all[:, i]
                vals_no_mask = vals[~vals.mask].data
                plt.scatter(x=list(range(vals_no_mask.shape[0])),
                            y=vals_no_mask.data)
                plt.title(name)
                plt.savefig(name.rstrip('.tif') + '.png')
                plt.close()
コード例 #2
0
ファイル: config.py プロジェクト: jbw900/uncover-ml
    def __init__(self, config_dict: dict):
        d = config_dict
        if d['type'] not in ('ordinal', 'categorical'):
            _logger.warning(
                "Feature set type must be ordinal or categorical. "
                "Unknwon option: '%s'. Type has been set to 'ordinal'.",
                d['type'])
            self.type = 'ordinal'
        else:
            self.type = d['type']
        is_categorical = d['type'] == 'categorical'

        # get list of all the files
        if 'files' in d:
            self.tabular = False
            files = []
            for source in d['files']:
                key = next(iter(source.keys()))
                if key == 'path':
                    files.append(path.abspath(source[key]))
                elif key == 'directory':
                    glob_string = path.join(path.abspath(source[key]), "*.tif")
                    f_list = glob.glob(glob_string)
                    files.extend(f_list)
                elif key == 'list':
                    csvfile = path.abspath(source[key])
                    with open(csvfile, 'r') as f:
                        reader = csv.reader(f)
                        tifs = list(reader)
                        tifs = [
                            f[0].strip() for f in tifs
                            if (len(f) > 0 and f[0].strip()
                                and f[0].strip()[0] != '#')
                        ]
                    for f in tifs:
                        files.append(path.abspath(f))

            self.files = sorted(files, key=str.lower)
            n_feat = len(self.files)
            _logger.debug("Loaded feature set with files: {self.files}")
        elif 'shapefile' in d:
            self.tabular = True
            self.fields = sorted(d['shapefile']['fields'], key=str.lower)
            n_feat = len(self.fields)
            self.file = d['shapefile']['file']
            self.ndv = d['shapefile'].get('ndv', None)
            _logger.debug(f"Loaded feature set with fields: {self.fields}")

        trans_i, im, trans_g = _parse_transform_set(d['transforms'],
                                                    d['imputation'], n_feat)

        self.transform_set = transforms.ImageTransformSet(
            trans_i, im, trans_g, is_categorical)
コード例 #3
0
ファイル: features.py プロジェクト: jesserobertson/uncover-ml
def cull_all_null_rows(feature_sets):
    # cull targets with all null values
    dummy_transform = transforms.ImageTransformSet(image_transforms=None,
                                                   imputer=None,
                                                   global_transforms=None,
                                                   is_categorical=True)
    transformed_vectors = [dummy_transform(c) for c in feature_sets]

    bool_transformed_vectors = np.concatenate([t.mask for t in
                                               transformed_vectors], axis=1)
    covaraiates = bool_transformed_vectors.shape[1]
    rows_to_keep = np.sum(bool_transformed_vectors, axis=1) != covaraiates
    return rows_to_keep
コード例 #4
0
ファイル: config.py プロジェクト: basaks/uncover-ml
    def __init__(self, d):
        self.name = d['name']
        self.type = d['type']
        if d['type'] not in {'ordinal', 'categorical'}:
            log.warning("Feature set type must be ordinal or categorical: "
                        "Unknown option "
                        "{} (assuming ordinal)".format(d['type']))
        is_categorical = d['type'] == 'categorical'

        # get list of all the files
        files = []
        for source in d['files']:
            key = next(iter(source.keys()))
            if key == 'path':
                files.append(path.abspath(source[key]))
            elif key == 'directory':
                glob_string = path.join(path.abspath(source[key]), "*.tif")
                f_list = glob.glob(glob_string)
                files.extend(f_list)
            elif key == 'list':
                csvfile = path.abspath(source[key])
                with open(csvfile, 'r') as f:
                    reader = csv.reader(f)
                    tifs = list(reader)
                    tifs = [f[0].strip() for f in tifs
                            if (len(f) > 0 and f[0].strip() and
                                f[0].strip()[0] != '#')]
                for f in tifs:
                    files.append(path.abspath(f))

        self.files = sorted(files, key=str.lower)
        n_files = len(self.files)
        if 'transforms' not in d:
            d['transforms'] = None

        if 'imputation' not in d:
            d['imputation'] = None

        trans_i, im, trans_g = _parse_transform_set(d['transforms'],
                                                    d['imputation'],
                                                    n_files)
        self.transform_set = transforms.ImageTransformSet(trans_i, im, trans_g,
                                                          is_categorical)
コード例 #5
0
def save_intersected_features_and_targets(feature_sets,
                                          transform_sets,
                                          targets,
                                          config,
                                          impute=True):
    """
    This function saves a table of covariate values and the target 
    value intersected at each point. It also contains columns for 
    UID 'index' and a predicted value. 

    If the target shapefile contains an 'index' field, this will be
    used to populate the 'index' column. This is intended to be used
    as a unique ID for each point in post-processing. If no 'index'
    field exists this column will be zero filled.

    The 'prediction' column is for predicted values created during 
    cross-validation. Again, this is for post-processing. It will only
    be populated if cross-validation is run later on. If not, it will
    be zero filled.

    Two files will be output:
        .../output_dir/{name_of_config}_rawcovariates.csv
        .../output_dir/{name_of_config}_rawcovariates_mask.csv

    This function will also optionally output intersected covariates scatter
    plot and covariate correlation matrix plot.
    """
    if config.fields_to_write_to_csv:
        for f in config.fields_to_write_to_csv:
            if f not in targets.fields:
                raise ValueError(
                    f"write_to_csv field '{f}' does not exist in shapefile records"
                )

    transform_sets_mod = []
    cov_names = []
    for fs in feature_sets:
        cov_names.extend(fs.keys())
    other_names = ['X', 'Y', 'target', 'prediction']

    if config.fields_to_write_to_csv:
        other_names = config.fields_to_write_to_csv + other_names

    header = ','.join(cov_names + other_names)
    mask_header = ','.join(cov_names)

    for t in transform_sets:
        imputer = copy.deepcopy(t.imputer) if impute else None
        dummy_transform = transforms.ImageTransformSet(
            image_transforms=None,
            imputer=imputer,
            global_transforms=None,
            is_categorical=t.is_categorical)
        transform_sets_mod.append(dummy_transform)

    transformed_vectors = [
        t(c) for c, t in zip(feature_sets, transform_sets_mod)
    ]

    x = np.ma.concatenate(transformed_vectors, axis=1)
    x_all = gather_features(x, node=0)
    all_xy = mpiops.comm.gather(targets.positions, root=0)
    all_targets = mpiops.comm.gather(targets.observations, root=0)

    if config.fields_to_write_to_csv:
        if config.target_search:
            raise NotImplementedError(
                "Can't write 'write_to_csv' columns with target search feature at this time."
            )
        field_values = []
        for f in config.fields_to_write_to_csv:
            field_values.append(mpiops.comm.gather(targets.fields[f]))

    if mpiops.chunk_index == 0:
        data = [x_all.data]
        if config.fields_to_write_to_csv:
            for f, v in zip(config.fields_to_write_to_csv, field_values):
                data.append(np.atleast_2d(np.ma.concatenate(v, axis=0)).T)
        all_xy = np.ma.concatenate(all_xy, axis=0)
        all_targets = np.ma.concatenate(all_targets, axis=0)
        xy = np.atleast_2d(all_xy)
        t = np.atleast_2d(all_targets).T
        data += [xy, t]
        # Zeros for prediction values
        data.append(np.zeros(t.shape))
        data = np.hstack(data)
        np.savetxt(config.raw_covariates,
                   X=data,
                   fmt='%s',
                   delimiter=',',
                   header=header,
                   comments='')

        np.savetxt(config.raw_covariates_mask,
                   X=~x_all.mask.astype(bool),
                   fmt='%f',
                   delimiter=',',
                   header=mask_header,
                   comments='')

        if config.plot_intersection:
            diagnostics.plot_covariates_x_targets(config.raw_covariates,
                                                  cols=2).savefig(
                                                      config.plot_intersection)

        if config.plot_correlation:
            diagnostics.plot_covariate_correlation(
                config.raw_covariates).savefig(config.plot_correlation)
コード例 #6
0
def save_intersected_features_and_targets(feature_sets,
                                          transform_sets,
                                          targets,
                                          config,
                                          impute=True):
    """
    This function saves raw covariates values at the target locations, i.e.,
    after the targets have been intersected.

    This will save the following two files if they are provided in the
    config file:
        a) rawcovariates.csv: the covariate values in csv
        b) rawcovariates_mask.csv: the corresponding mask in csv
    This function will also optionally output intersected covariates scatter
    plot.
    """
    transform_sets_mod = []
    names = [
        '{}_{}'.format(b, basename(k)) for ec in feature_sets for k in ec
        for b in range(ec[k].shape[3])
    ]

    names += ["X", "Y", config.target_property + "(target)"]
    header = ', '.join(names)

    for t in transform_sets:
        imputer = copy.deepcopy(t.imputer) if impute else None
        dummy_transform = transforms.ImageTransformSet(
            image_transforms=None,
            imputer=imputer,
            global_transforms=None,
            is_categorical=t.is_categorical)
        transform_sets_mod.append(dummy_transform)

    transformed_vectors = [
        t(c) for c, t in zip(feature_sets, transform_sets_mod)
    ]

    x = np.ma.concatenate(transformed_vectors, axis=1)
    x_all = gather_features(x, node=0)

    all_xy = mpiops.comm.gather(targets.positions, root=0)
    all_targets = mpiops.comm.gather(targets.observations, root=0)

    if mpiops.chunk_index == 0:
        all_xy = np.ma.concatenate(all_xy, axis=0)
        all_targets = np.ma.concatenate(all_targets, axis=0)
        xy = np.atleast_2d(all_xy)
        t = np.atleast_2d(all_targets).T
        data = np.hstack((x_all.data, xy, t))
        np.savetxt(config.raw_covariates,
                   X=data,
                   delimiter=',',
                   header=header,
                   comments='')
        mask = np.hstack((x_all.mask.astype(int), np.zeros_like(t)))
        np.savetxt(config.raw_covariates_mask,
                   X=mask,
                   delimiter=',',
                   header=header,
                   comments='')

        if config.plot_intersection:
            diagnostics.plot_covariates_x_targets(config.raw_covariates,
                                                  cols=2).savefig(
                                                      config.plot_intersection)

        if config.plot_correlation:
            diagnostics.plot_covariate_correlation(
                config.raw_covariates).savefig(config.plot_correlation)