Ejemplo n.º 1
0
    def get_samples(self, over_sample_rate=10):
        """
        Get samples from the sampler.

        This returns samples as a list of dictionaries, with the
        sample variables as the keys:

        .. code:: python

            [{'b': 0.89856, 'a': 1}, {'b': 0.923223, 'a': 1}, ... ]
        """
        if self._samples is not None:
            return self._samples

        self._samples = []

        new_sampling_dict = self.data.copy()
        new_sampling_dict["num_samples"] *= over_sample_rate
        new_sampling_dict["type"] = "random"
        new_random_sample = RandomSampler(new_sampling_dict)
        new_random_sample.get_samples()
        try:
            new_random_sample.downselect(self.data["num_samples"])
        except Exception as exception:
            log_and_raise_exception(
                f"Error during 'downselect' in 'best_candidate' "
                f"sampling: {exception}")
        self._samples = new_random_sample._samples

        return self._samples
Ejemplo n.º 2
0
 def check_validity(self):
     super().check_validity()
     if not self.path.exists():
         log_and_raise_exception(
             f"Unable to find module {self.path} for 'custom' sampler")
     if self.sample_function is None:
         log_and_raise_exception("The 'custom' sampler requires "
                                 "'sample_function' to be defined.")
Ejemplo n.º 3
0
    def check_validity(self):
        super().check_validity()
        if not self.path.is_file():
            log_and_raise_exception(
                f"Could not find file {self.path} for CsvSampler.")

        for key, value in self.csv_data.items():
            if len(value) == 0:
                log_and_raise_exception(f"No values associated with parameter "
                                        f"{key} from file {self.path}.")
        with suppress(KeyError):
            test_for_uniform_lengths(self.data['parameters'].items())
Ejemplo n.º 4
0
 def check_validity(self):
     try:
         validate_sampler(self.data)
     except ValueError:
         log_and_raise_exception(
             f"No 'type' entry found in sampler data {self.data}")
     except KeyError:
         log_and_raise_exception(
             f"Sampler type {self.data['type']} not found in schema")
     except ValidationError:
         log_and_raise_exception("Sampler data is invalid")
Ejemplo n.º 5
0
def new_sampler(sampler_data):
    """
    Dispatch the sampler for the requested sampler data.

    If there is no ``type`` entry in the data, it will raise a
    ``SamplingError``.

    If the ``type`` entry does not match one of the built-in
    samplers, it will raise a ``SamplingError``. Currently the built-in
    samplers are:

    | * ``best_candidate``
    | * ``column_list``
    | * ``cross_product``
    | * ``csv``
    | * ``custom``
    | * ``list``
    | * ``random``

    :param sampler_data: data to validate.
    :returns: Sampler object matching the data.
    """

    if 'type' not in sampler_data:
        log_and_raise_exception(
            f"No type entry in sampler data {sampler_data}")

    try:
        sampler = BaseSampler.SAMPLE_FUNCTIONS_DICT[sampler_data['type']]
    except KeyError:
        log_and_raise_exception(f"{sampler_data['type']} " +
                                "is not a recognized sampler type")

    try:
        return sampler(sampler_data)
    except SamplingError as exception:
        log_and_raise_exception(exception)
Ejemplo n.º 6
0
    def check_validity(self):
        super().check_validity()
        self._check_variables()

        # @TODO: test that file exists and it contains the right parameters
        if 'previous_samples' in self.data.keys():
            log_and_raise_exception(
                "'previous_samples' is not yet supported.\n"
                "  Please contact Chris Krenn or Brian Daub for assistance.")

        # @TODO: add error check to schema
        for key, value in self.data["parameters"].items():
            try:
                float(value['min'])
            except ValueError:
                log_and_raise_exception(
                    f"Parameter ({key}) must have a numeric minimum.\n"
                    f"  Current minimum value is: {value}.")
            try:
                float(value['max'])
            except ValueError:
                log_and_raise_exception(
                    f"Parameter ({key}) must have a numeric maximum.\n"
                    f"  Current maximum value is: {value}.")
Ejemplo n.º 7
0
    def downselect(self, samples):
        """
        Downselect samples based on specification in sampling_dict.

        Prototype dictionary::

           num_samples: 30
           previous_samples: samples.csv # optional
           parameters:
               X1:
                   min: 10
                   max: 50
               X2:
                   min: 10
                   max: 50
        """
        # @TODO: clean up pylint errors in this method
        if not PANDAS_PLUS:
            log_and_raise_exception(
                "This function requires pandas, numpy & scipy packages")

        df = pd.DataFrame.from_dict(self._samples)
        columns = self.parameters
        ndims = len(columns)
        candidates = df[columns].values.tolist()
        num_points = samples

        if not('previous_samples' in self.data.keys()):
            sample_points = []
            sample_points.append(candidates[0])
            new_sample_points = []
            new_sample_points.append(candidates[0])
            new_sample_ids = []
            new_sample_ids.append(0)
            n0 = 1
        else:
            try:
                previous_samples = pd.read_csv(self.data["previous_samples"])
            except ValueError:
                raise Exception("Error opening previous_samples datafile:" +
                                self.data["previous_samples"])
            sample_points = previous_samples[columns].values.tolist()
            new_sample_points = []
            new_sample_ids = []
            n0 = 0

        mins = np.zeros(ndims)
        maxs = np.zeros(ndims)

        first = True
        for i, candidate in enumerate(candidates):
            for j in range(ndims):
                if first:
                    mins[j] = candidate[j]
                    maxs[j] = candidate[j]
                    first = False
                else:
                    mins[j] = min(candidate[j], mins[j])
                    maxs[j] = max(candidate[j], maxs[j])
        print("extrema for new input_labels: ", mins, maxs)
        print("down sampling to %d best candidates from %d total points." % (
            num_points, len(candidates)))
        bign = len(candidates)

        for n in range(n0, num_points):
            px = np.asarray(sample_points)
            tree = spatial.KDTree(px)
            j = bign
            d = 0.0
            for i in range(1, bign):
                pos = candidates[i]
                dist = tree.query(pos)[0]
                if dist > d:
                    j = i
                    d = dist
            if j == bign:
                raise Exception(
                    "During 'downselect', failed to find any "
                    "candidates in the tree.")
            else:
                new_sample_points.append(candidates[j])
                sample_points.append(candidates[j])
                new_sample_ids.append(j)

        new_samples_df = pd.DataFrame(columns=df.keys().tolist())
        for new_sample_id in new_sample_ids:
            new_samples_df = new_samples_df.append(df.iloc[new_sample_id])

        self._samples = new_samples_df.to_dict(orient='records')
Ejemplo n.º 8
0
 def _check_variables_for_dups(self):
     if len(self.parameters) != len(set(self.parameters)):
         dupes = set(find_duplicates(self.parameters))
         log_and_raise_exception(
             "The following constants or parameters are defined more " +
             "than once: " + str(dupes))
Ejemplo n.º 9
0
 def _check_variables_existence(self):
     if len(self.parameters) == 0:
         log_and_raise_exception(
             "Either constants or parameters must be included in the " +
             "sampler data")
Ejemplo n.º 10
0
 def _check_variables_strings(self):
     for parameter in self.parameters:
         if not isinstance(parameter, str):
             log_and_raise_exception(
                 "constants and parameters must be strings")