def get_samples(self, over_sample_rate=10): """ Get samples from the sampler. This returns samples as a list of dictionaries, with the sample variables as the keys: .. code:: python [{'b': 0.89856, 'a': 1}, {'b': 0.923223, 'a': 1}, ... ] """ if self._samples is not None: return self._samples self._samples = [] new_sampling_dict = self.data.copy() new_sampling_dict["num_samples"] *= over_sample_rate new_sampling_dict["type"] = "random" new_random_sample = RandomSampler(new_sampling_dict) new_random_sample.get_samples() try: new_random_sample.downselect(self.data["num_samples"]) except Exception as exception: log_and_raise_exception( f"Error during 'downselect' in 'best_candidate' " f"sampling: {exception}") self._samples = new_random_sample._samples return self._samples
def check_validity(self): super().check_validity() if not self.path.exists(): log_and_raise_exception( f"Unable to find module {self.path} for 'custom' sampler") if self.sample_function is None: log_and_raise_exception("The 'custom' sampler requires " "'sample_function' to be defined.")
def check_validity(self): super().check_validity() if not self.path.is_file(): log_and_raise_exception( f"Could not find file {self.path} for CsvSampler.") for key, value in self.csv_data.items(): if len(value) == 0: log_and_raise_exception(f"No values associated with parameter " f"{key} from file {self.path}.") with suppress(KeyError): test_for_uniform_lengths(self.data['parameters'].items())
def check_validity(self): try: validate_sampler(self.data) except ValueError: log_and_raise_exception( f"No 'type' entry found in sampler data {self.data}") except KeyError: log_and_raise_exception( f"Sampler type {self.data['type']} not found in schema") except ValidationError: log_and_raise_exception("Sampler data is invalid")
def new_sampler(sampler_data): """ Dispatch the sampler for the requested sampler data. If there is no ``type`` entry in the data, it will raise a ``SamplingError``. If the ``type`` entry does not match one of the built-in samplers, it will raise a ``SamplingError``. Currently the built-in samplers are: | * ``best_candidate`` | * ``column_list`` | * ``cross_product`` | * ``csv`` | * ``custom`` | * ``list`` | * ``random`` :param sampler_data: data to validate. :returns: Sampler object matching the data. """ if 'type' not in sampler_data: log_and_raise_exception( f"No type entry in sampler data {sampler_data}") try: sampler = BaseSampler.SAMPLE_FUNCTIONS_DICT[sampler_data['type']] except KeyError: log_and_raise_exception(f"{sampler_data['type']} " + "is not a recognized sampler type") try: return sampler(sampler_data) except SamplingError as exception: log_and_raise_exception(exception)
def check_validity(self): super().check_validity() self._check_variables() # @TODO: test that file exists and it contains the right parameters if 'previous_samples' in self.data.keys(): log_and_raise_exception( "'previous_samples' is not yet supported.\n" " Please contact Chris Krenn or Brian Daub for assistance.") # @TODO: add error check to schema for key, value in self.data["parameters"].items(): try: float(value['min']) except ValueError: log_and_raise_exception( f"Parameter ({key}) must have a numeric minimum.\n" f" Current minimum value is: {value}.") try: float(value['max']) except ValueError: log_and_raise_exception( f"Parameter ({key}) must have a numeric maximum.\n" f" Current maximum value is: {value}.")
def downselect(self, samples): """ Downselect samples based on specification in sampling_dict. Prototype dictionary:: num_samples: 30 previous_samples: samples.csv # optional parameters: X1: min: 10 max: 50 X2: min: 10 max: 50 """ # @TODO: clean up pylint errors in this method if not PANDAS_PLUS: log_and_raise_exception( "This function requires pandas, numpy & scipy packages") df = pd.DataFrame.from_dict(self._samples) columns = self.parameters ndims = len(columns) candidates = df[columns].values.tolist() num_points = samples if not('previous_samples' in self.data.keys()): sample_points = [] sample_points.append(candidates[0]) new_sample_points = [] new_sample_points.append(candidates[0]) new_sample_ids = [] new_sample_ids.append(0) n0 = 1 else: try: previous_samples = pd.read_csv(self.data["previous_samples"]) except ValueError: raise Exception("Error opening previous_samples datafile:" + self.data["previous_samples"]) sample_points = previous_samples[columns].values.tolist() new_sample_points = [] new_sample_ids = [] n0 = 0 mins = np.zeros(ndims) maxs = np.zeros(ndims) first = True for i, candidate in enumerate(candidates): for j in range(ndims): if first: mins[j] = candidate[j] maxs[j] = candidate[j] first = False else: mins[j] = min(candidate[j], mins[j]) maxs[j] = max(candidate[j], maxs[j]) print("extrema for new input_labels: ", mins, maxs) print("down sampling to %d best candidates from %d total points." % ( num_points, len(candidates))) bign = len(candidates) for n in range(n0, num_points): px = np.asarray(sample_points) tree = spatial.KDTree(px) j = bign d = 0.0 for i in range(1, bign): pos = candidates[i] dist = tree.query(pos)[0] if dist > d: j = i d = dist if j == bign: raise Exception( "During 'downselect', failed to find any " "candidates in the tree.") else: new_sample_points.append(candidates[j]) sample_points.append(candidates[j]) new_sample_ids.append(j) new_samples_df = pd.DataFrame(columns=df.keys().tolist()) for new_sample_id in new_sample_ids: new_samples_df = new_samples_df.append(df.iloc[new_sample_id]) self._samples = new_samples_df.to_dict(orient='records')
def _check_variables_for_dups(self): if len(self.parameters) != len(set(self.parameters)): dupes = set(find_duplicates(self.parameters)) log_and_raise_exception( "The following constants or parameters are defined more " + "than once: " + str(dupes))
def _check_variables_existence(self): if len(self.parameters) == 0: log_and_raise_exception( "Either constants or parameters must be included in the " + "sampler data")
def _check_variables_strings(self): for parameter in self.parameters: if not isinstance(parameter, str): log_and_raise_exception( "constants and parameters must be strings")