def setup_pipeline(config): config.optimisation['algorithm'] = config.algorithm if config.optimisation['algorithm'] not in algos: raise ConfigException('optimisation algo must exist in algos dict') steps = [] param_dict = {} if 'featuretransforms' in config.optimisation: config.featuretransform = config.optimisation['featuretransforms'] if 'pca' in config.featuretransform: steps.append(('pca', pca)) for k, v in config.featuretransform['pca'].items(): param_dict['pca__' + k] = v if 'hyperparameters' in config.optimisation: steps.append((config.optimisation['algorithm'], algos[config.optimisation['algorithm']])) for k, v in config.optimisation['hyperparameters'].items(): if k == 'target_transform': v = [transforms.transforms[vv]() for vv in v] if k == 'kernel': # for scikitlearn kernels if isinstance(v, dict): V = [] for kk, value in v.items(): value = OrderedDict(value) values = [v for v in value.values()] prod = product(*values) keys = value.keys() combinations = [] for p in prod: d = {} for kkk, pp in zip(keys, p): d[kkk] = pp combinations.append(d) V += [ kernels[kk](**c) + WhiteKernel() for c in combinations ] v = V param_dict[config.optimisation['algorithm'] + '__' + k] = v pipe = Pipeline(steps=steps) estimator = GridSearchCV( pipe, param_dict, n_jobs=config.n_jobs, iid=False, pre_dispatch='2*n_jobs', verbose=True, cv=5, ) return estimator
def __init__(self, method='ordinary', variogram_model='linear', nlags=6, weight=False, n_closest_points=10, verbose=False): if method not in krige_methods.keys(): raise ConfigException('Kirging method must be ' 'one of {}'.format(krige_methods.keys())) self.variogram_model = variogram_model self.verbose = verbose self.nlags = nlags self.weight = weight self.n_closest_points = n_closest_points self.model = None # not trained self.method = method
def resample_shapefile(config, outfile=None, validation_file=None, validation_points=100): shapefile = config.target_file if not config.resample: return shapefile else: # sample shapefile if not outfile: final_shpfile = tempfile.mktemp(suffix='.shp', dir=config.output_dir) else: final_shpfile = abspath(outfile) number_of_transforms = len(config.resample) for i, r in enumerate(config.resample): for k in r: if k not in resampling_techniques.keys(): raise ConfigException("Resampling must be 'value' or " "'spatial'") int_shpfile = final_shpfile if i == number_of_transforms -1 \ else tempfile.mktemp(suffix='.shp', dir=config.output_dir) input_shpfile = shapefile if i == 0 else out_shpfile # just create the validation shape during last sampling step validation = validation_file \ if i == number_of_transforms-1 else None out_shpfile = resampling_techniques[k]( input_shpfile, int_shpfile, target_field=config.target_property, validation_file=validation, validation_points=validation_points, ** r[k]['arguments'] ) if i > 0: _remove_files(splitext(input_shpfile)[0], ['.shp', '.shx', '.prj', '.dbf', '.cpg']) log.info('Output shapefile is {}'.format(out_shpfile)) return out_shpfile
def fit(self, x, y, *args, **kwargs): """ Parameters ---------- x: ndarray array of Points, (x, y) pairs y: ndarray array of targets """ if x.shape[1] != 2: raise ConfigException('krige can use only 2 covariates') self.model = krige_methods[self.method]( x=x[:, 0], y=x[:, 1], z=y, variogram_model=self.variogram_model, verbose=self.verbose, nlags=self.nlags, weight=self.weight, )
def setup_pipeline(config): if config.optimisation['algorithm'] not in algos: raise ConfigException( 'Optimisation algorithm must exist in avilable algorithms: {}'. format(list(algos.keys()))) steps = [] param_dict = {} if 'featuretransforms' in config.optimisation: config.featuretransform = config.optimisation['featuretransforms'] if 'pca' in config.featuretransform: steps.append(('pca', pca)) for k, v in config.featuretransform['pca'].items(): param_dict['pca__' + k] = v if 'scorers' in config.optimisation: scorers = config.optimisation['scorers'] scorer_maps = [ regression_predict_scorers, classification_proba_scorers, classification_predict_scorers ] scoring = {} for s in scorers: for sm in scorer_maps: f = sm.get(s) if f is not None: break if f is None: _logger.warning(f"Scorer '{s}' not found!") else: scoring[s] = f if not scoring: scoring = None else: scoring = None if 'hyperparameters' in config.optimisation: steps.append((config.optimisation['algorithm'], algos[config.optimisation['algorithm']])) for k, v in config.optimisation['hyperparameters'].items(): if k == 'target_transform': v = [transforms.transforms[vv]() for vv in v] if k == 'kernel': # for scikitlearn kernels if isinstance(v, dict): V = [] for kk, value in v.items(): value = OrderedDict(value) values = [v for v in value.values()] prod = product(*values) keys = value.keys() combinations = [] for p in prod: d = {} for kkk, pp in zip(keys, p): d[kkk] = pp combinations.append(d) V += [ kernels[kk](**c) + WhiteKernel() for c in combinations ] v = V param_dict[config.optimisation['algorithm'] + '__' + k] = v pipe = Pipeline(steps=steps) estimator = GridSearchCV( pipe, param_dict, n_jobs=config.n_jobs, iid=False, scoring=scoring, refit=False, pre_dispatch='2*n_jobs', verbose=True, cv=5, ) return estimator, scoring