Ejemplo n.º 1
0
def setup_pipeline(config):
    config.optimisation['algorithm'] = config.algorithm
    if config.optimisation['algorithm'] not in algos:
        raise ConfigException('optimisation algo must exist in algos dict')
    steps = []
    param_dict = {}
    if 'featuretransforms' in config.optimisation:
        config.featuretransform = config.optimisation['featuretransforms']
        if 'pca' in config.featuretransform:
            steps.append(('pca', pca))
            for k, v in config.featuretransform['pca'].items():
                param_dict['pca__' + k] = v
    if 'hyperparameters' in config.optimisation:
        steps.append((config.optimisation['algorithm'],
                      algos[config.optimisation['algorithm']]))
        for k, v in config.optimisation['hyperparameters'].items():
            if k == 'target_transform':
                v = [transforms.transforms[vv]() for vv in v]
            if k == 'kernel':
                # for scikitlearn kernels
                if isinstance(v, dict):
                    V = []
                    for kk, value in v.items():
                        value = OrderedDict(value)
                        values = [v for v in value.values()]
                        prod = product(*values)
                        keys = value.keys()
                        combinations = []
                        for p in prod:
                            d = {}
                            for kkk, pp in zip(keys, p):
                                d[kkk] = pp
                            combinations.append(d)
                        V += [
                            kernels[kk](**c) + WhiteKernel()
                            for c in combinations
                        ]
                    v = V

            param_dict[config.optimisation['algorithm'] + '__' + k] = v
    pipe = Pipeline(steps=steps)
    estimator = GridSearchCV(
        pipe,
        param_dict,
        n_jobs=config.n_jobs,
        iid=False,
        pre_dispatch='2*n_jobs',
        verbose=True,
        cv=5,
    )

    return estimator
Ejemplo n.º 2
0
 def __init__(self,
              method='ordinary',
              variogram_model='linear',
              nlags=6,
              weight=False,
              n_closest_points=10,
              verbose=False):
     if method not in krige_methods.keys():
         raise ConfigException('Kirging method must be '
                               'one of {}'.format(krige_methods.keys()))
     self.variogram_model = variogram_model
     self.verbose = verbose
     self.nlags = nlags
     self.weight = weight
     self.n_closest_points = n_closest_points
     self.model = None  # not trained
     self.method = method
Ejemplo n.º 3
0
def resample_shapefile(config, outfile=None, validation_file=None,
                       validation_points=100):
    shapefile = config.target_file

    if not config.resample:
        return shapefile
    else:  # sample shapefile
        if not outfile:
            final_shpfile = tempfile.mktemp(suffix='.shp',
                                            dir=config.output_dir)
        else:
            final_shpfile = abspath(outfile)

        number_of_transforms = len(config.resample)

        for i, r in enumerate(config.resample):
            for k in r:
                if k not in resampling_techniques.keys():
                    raise ConfigException("Resampling must be 'value' or "
                                          "'spatial'")

                int_shpfile = final_shpfile if i == number_of_transforms -1 \
                    else tempfile.mktemp(suffix='.shp', dir=config.output_dir)

                input_shpfile = shapefile if i == 0 else out_shpfile

                # just create the validation shape during last sampling step
                validation = validation_file \
                    if i == number_of_transforms-1 else None

                out_shpfile = resampling_techniques[k](
                    input_shpfile, int_shpfile,
                    target_field=config.target_property,
                    validation_file=validation,
                    validation_points=validation_points,
                    ** r[k]['arguments']
                    )
                if i > 0:
                    _remove_files(splitext(input_shpfile)[0],
                                  ['.shp', '.shx', '.prj', '.dbf', '.cpg'])

        log.info('Output shapefile is {}'.format(out_shpfile))
        return out_shpfile
Ejemplo n.º 4
0
    def fit(self, x, y, *args, **kwargs):
        """
        Parameters
        ----------
        x: ndarray
            array of Points, (x, y) pairs
        y: ndarray
            array of targets
        """
        if x.shape[1] != 2:
            raise ConfigException('krige can use only 2 covariates')

        self.model = krige_methods[self.method](
            x=x[:, 0],
            y=x[:, 1],
            z=y,
            variogram_model=self.variogram_model,
            verbose=self.verbose,
            nlags=self.nlags,
            weight=self.weight,
        )
Ejemplo n.º 5
0
def setup_pipeline(config):
    if config.optimisation['algorithm'] not in algos:
        raise ConfigException(
            'Optimisation algorithm must exist in avilable algorithms: {}'.
            format(list(algos.keys())))

    steps = []
    param_dict = {}

    if 'featuretransforms' in config.optimisation:
        config.featuretransform = config.optimisation['featuretransforms']
        if 'pca' in config.featuretransform:
            steps.append(('pca', pca))
            for k, v in config.featuretransform['pca'].items():
                param_dict['pca__' + k] = v

    if 'scorers' in config.optimisation:
        scorers = config.optimisation['scorers']
        scorer_maps = [
            regression_predict_scorers, classification_proba_scorers,
            classification_predict_scorers
        ]

        scoring = {}

        for s in scorers:
            for sm in scorer_maps:
                f = sm.get(s)
                if f is not None:
                    break
            if f is None:
                _logger.warning(f"Scorer '{s}' not found!")
            else:
                scoring[s] = f
        if not scoring:
            scoring = None
    else:
        scoring = None

    if 'hyperparameters' in config.optimisation:
        steps.append((config.optimisation['algorithm'],
                      algos[config.optimisation['algorithm']]))
        for k, v in config.optimisation['hyperparameters'].items():
            if k == 'target_transform':
                v = [transforms.transforms[vv]() for vv in v]
            if k == 'kernel':
                # for scikitlearn kernels
                if isinstance(v, dict):
                    V = []
                    for kk, value in v.items():
                        value = OrderedDict(value)
                        values = [v for v in value.values()]
                        prod = product(*values)
                        keys = value.keys()
                        combinations = []
                        for p in prod:
                            d = {}
                            for kkk, pp in zip(keys, p):
                                d[kkk] = pp
                            combinations.append(d)
                        V += [
                            kernels[kk](**c) + WhiteKernel()
                            for c in combinations
                        ]
                    v = V

            param_dict[config.optimisation['algorithm'] + '__' + k] = v

    pipe = Pipeline(steps=steps)

    estimator = GridSearchCV(
        pipe,
        param_dict,
        n_jobs=config.n_jobs,
        iid=False,
        scoring=scoring,
        refit=False,
        pre_dispatch='2*n_jobs',
        verbose=True,
        cv=5,
    )

    return estimator, scoring