def get_db(self, dataset):
     if dataset in self.baseline_dbs.keys():
         return self.baseline_dbs[dataset]
     elif dataset in self.baseline_db_files.keys():
         self._load_baseline_db(dataset)
         return self.baseline_dbs[dataset]
     else:
         Logger.log(f"could not find baseline db for dataset: {dataset}", "ERROR")
 def _process_response(self, response):
     if response.status_code == 200:
         return True
     elif response.status_code == 404:
         Logger.log("could not reach server", "ERROR")
         return False
     else:
         Logger.log("unknown error", "ERROR")
         return False
Exemple #3
0
    def _ask(self):
        if self.grid_created is False:
            self._create_grid()
        param = self.samples.pop(0)

        if len(self.samples) == 0:
            message = 'Last parameter being provided - there will not be any more available samples in the grid.'
            Logger.log(message, 'INFO')

        return ParameterVector(array=param, param_space=self.param_space)
 def maxima(self):
     message = 'DiscreteMichalewicz has an infinite number of maxima'
     Logger.log(message, 'INFO')
     # some maxima
     maxima = []
     params = product([0, 1], repeat=self.param_dim)
     for param in params:
         param = list(param)
         value = self._run(param)
         maxima.append({'params': param, 'value': value})
     return maxima
    def list(self):
        Logger.log("connecting to server", "INFO")
        url = f"{self.URL}/list_datasets"
        print("URL", url)
        response = requests.post(url, data={})
        if response.status_code == 200:
            datasets = response.json()["datasets"]
            return sorted(datasets)
        else:
            return self._process_response(response)

        return self._process_response(response)
Exemple #6
0
def check_module(module_name, message, **kwargs):
    try:
        _ = __import__(module_name)
    except ModuleNotFoundError:
        from olympus import Logger
        error = traceback.format_exc()
        for line in error.split('\n'):
            if 'ModuleNotFoundError' in line:
                module = line.strip().strip("'").split("'")[-1]
        kwargs.update(locals())
        message = f'{message}'.format(**kwargs)
        Logger.log(message, 'ERROR')
Exemple #7
0
    def _validate_args(transformations):
        # check validity of transformation argument
        for transformation in transformations:
            if not (hasattr(DataTransformer, f'_forward_{transformation}') and
                    hasattr(DataTransformer, f'_backward_{transformation}')):
                raise NotImplementedError(
                    f'transformation {transformation} not implemented. Please select one of the '
                    f'available transformation.')

        if 'periodic' in transformations and transformations.index(
                'periodic') != 0:
            message = 'periodic transform is allowed only as the first transformation'
            Logger.log(message, 'ERROR')
Exemple #8
0
    def __init__(self,
                 planner,
                 emulator=None,
                 surface=None,
                 campaign=Campaign(),
                 database=None):
        """ The Evaluator does higher level operations that Planners and 
        Emulators do not do on their own. For instance, communicating parameters 
        and measurements to each other, keeping track of them ensuring they 
        match, and storing these in a Campaign object. All this can also be done 
        by the user using planner, emulator and campaign objects, which might 
        allow more customization. However, Evaluator provides a convenient 
        higher-level interface for common optimization tasks.

        Args:
            planner (Planner): an instance of a Planner.
            emulator (Emulator): an instance of a trained Emulator.
            surface (Surface): an instance of a Surface
            campaign (Campaign): an instance of a Campaign. By default, a new 
                Campaign instance is created. If this is set to None, no campaign 
                info will be stored.
            database (object): ...
        """
        Object.__init__(**locals())

        if emulator is not None:
            assert surface is None
            self.emulator_type = 'numerical'
        elif surface is not None:
            assert emulator is None
            self.emulator_type = 'analytic'
            self.emulator = surface
        else:
            Logger.log('One of emulator or surface needs to be provided',
                       'FATAL')

#        if isinstance(self.emulator, Emulator):
#            self.emulator_type = 'numerical'
#        elif isinstance(self.emulator, Surface):
#            self.emulator_type = 'analytic'

# provide the planner with the parameter space.
# NOTE: right now, outside of Evaluator, the param_space for a planner
#       needs to be set "manually" by the user
        self.planner.set_param_space(
            self.emulator.param_space
        )  # param space in emulator as it originates from dataset

        if self.campaign is not None:
            self.campaign.set_planner_specs(planner)
            self.campaign.set_emulator_specs(emulator)
Exemple #9
0
def _validate_dataset_args(kind, data, columns, target_names):
    if kind is not None:
        # -----------------------------------
        # check that a correct name is passed
        # -----------------------------------
        # TODO: reduce redundant code by importing the list from where we have it already
        module_path = os.path.dirname(os.path.abspath(__file__))
        olympus_datasets = []
        for dir_name in glob(f"{module_path}/dataset_*"):
            dir_name = dir_name.split("/")[-1][8:]
            olympus_datasets.append(dir_name)
        if kind not in olympus_datasets:
            message = (
                "Could not find dataset `{0}`. Please choose from one of the available "
                "datasets: {1}.".format(kind,
                                        ", ".join(list(olympus_datasets))))
            Logger.log(message, "FATAL")
        # --------------------------------------------------------------
        # we will discard these arguments, so check if they are provided
        # --------------------------------------------------------------
        if data is not None:
            message = (
                "One of the Olympus datasets has been loaded via the argument `kind`, argument `data` "
                "will be discarded")
            Logger.log(message, "WARNING")
        if columns is not None:
            message = (
                "One of the Olympus datasets has been loaded via the argument `kind`, argument `columns` "
                "will be discarded")
            Logger.log(message, "WARNING")
        if target_names is not None:
            message = (
                "One of the Olympus datasets has been loaded via the argument `kind`, argument "
                "`target_names` will be discarded")
            Logger.log(message, "WARNING")
Exemple #10
0
 def _create_optimizer(self):
     from pyDOE import lhs
     if self.budget is None:
         message = (
             f'Please provide a number of samples for this planner. Given no number of samples provided, '
             f'falling back to setting `budget` to {len(self.param_space)}')
         Logger.log(message, 'WARNING')
         self.budget = len(self.param_space)
     self.samples = lhs(len(self.param_space), samples=self.budget)
     for index, param in enumerate(self.param_space):
         self.samples[:, index] = (
             param.high - param.low) * self.samples[:, index] + param.low
     self.samples = list(self.samples)
     self.has_optimizer = True
Exemple #11
0
def _validate_planner_kind(kind):
    # if we received a string
    if type(kind) == str:
        from . import PlannerLoader

        kind = PlannerLoader.file_to_class(kind)
        avail_planners = get_planners_list()
        if kind not in avail_planners:
            message = ('Planner "{0}" not available in Olympus. Please choose '
                       "from one of the available planners: {1}".format(
                           kind, ", ".join(avail_planners)))
            Logger.log(message, "FATAL")

    # if we get an instance of a planner class
    elif isinstance(kind, AbstractPlanner):
        # make sure it has the necessary methods
        for method in ["_set_param_space", "_tell", "_ask"]:
            implementation = getattr(kind, method, None)
            if not callable(implementation):
                message = f'The object {kind} does not implement the necessary method "{method}"'
                Logger.log(message, "FATAL")

    # if we received a custom planner class
    elif issubclass(kind, AbstractPlanner):
        # make sure it has the necessary methods
        for method in ["_set_param_space", "_tell", "_ask"]:
            implementation = getattr(kind, method, None)
            if not callable(implementation):
                message = f'The object {kind} does not implement the necessary method "{method}"'
                Logger.log(message, "FATAL")

    # if we do not know what was passed raise an error
    else:
        message = 'Could not initialize Planner: the argument "kind" is neither a string or AbstractPlanner subclass'
        Logger.log(message, "FATAL")
    def run(self, params, return_paramvector=False):
        """Evaluate the surface at the chosen location.

        Args:
            params (array): Set of input parameters for which to return the function value.
            return_paramvector (bool): Whether to return a ``ParameterVector`` object instead of a list of lists.
                Default is False.

        Returns:
            values (ParameterVector): function values evaluated at the chosen locations.
        """
        if isinstance(params, float) or isinstance(params, int):
            params = np.array([params])
        elif type(params) == list:
            params = np.array(params)
        if len(params.shape) == 1:
            params = np.expand_dims(params, axis=0)

        # TODO: these validations could be moved to ParameterSpace class
        # validate params
        if params.shape[1] != len(self.param_space):
            message = (f'Dimensions of provided params ({params.shape[1]}) does not match expected '
                       f'dimension ({len(self.param_space)})')
            Logger.log(message, 'ERROR')

        # this raises warnings for out-of-bounds parameters
        for param_set in params:
            self.param_space.validate(param_set)

        # get values from the surface class
        y_preds = [[self._run(param_set)] for param_set in params]  # 2d array

        # if we are not asking for a ParamVector, we can just return y_preds
        if return_paramvector is False:
            return y_preds

        # return a ParameterVector
        # NOTE: while we do not allow batches or multiple objectives yet, this code is supposed to be able to support
        #  those
        y_pred_objects = []  # list of ParamVectors with all samples and objectives
        # iterate over all samples (if we returned a batch of predictions)
        for y_pred in y_preds:
            y_pred_object = ParameterVector()
            # iterate over all objectives/targets
            for target_name, y in zip(['target_0'], y_pred):
                y_pred_object.from_dict({target_name: y})
            # append object to list
            y_pred_objects.append(y_pred_object)

        return y_pred_objects
Exemple #13
0
def _validate_noise_kind(kind):
    # if we received a string
    if type(kind) == str:
        avail_noises = get_noises_list()
        if kind not in avail_noises:
            message = ('Noise "{0}" not available in Olympus. Please choose '
                       'from one of the available noise objects: {1}'.format(kind, ', '.join(avail_noises)))
            Logger.log(message, 'FATAL')

    # if we get an instance of a noise class
    elif isinstance(kind, AbstractNoise):
        # make sure it has the necessary methods
        for method in ['_add_noise']:
            implementation = getattr(kind, method, None)
            if not callable(implementation):
                message = f'The object {kind} does not implement the necessary method "{method}"'
                Logger.log(message, 'FATAL')

    # if we received a custom planner class
    elif issubclass(kind, AbstractNoise):
        # make sure it has the necessary methods
        for method in ['_add_noise']:
            implementation = getattr(kind, method, None)
            if not callable(implementation):
                message = f'The object {kind} does not implement the necessary method "{method}"'
                Logger.log(message, 'FATAL')

    # if we do not know what was passed raise an error
    else:
        message = 'Could not initialize Noise: the argument "kind" is neither a string or AbstractNoise subclass'
        Logger.log(message, 'FATAL')
Exemple #14
0
 def list(self):
     Logger.log("connecting to github", "INFO")
     tmp_file = "remote_folders"
     remote_datasets = []
     subprocess.call(f"svn ls -R {self.URL} > {tmp_file}", shell=True)
     with open(tmp_file, "r") as content:
         for line in content:
             dataset_name = line.split("/")[0]
             if not dataset_name in remote_datasets:
                 remote_datasets.append(dataset_name)
     os.remove(tmp_file)
     remote_datasets = [
         remote_dataset[8:] for remote_dataset in remote_datasets
     ]
     return sorted(remote_datasets)
Exemple #15
0
    def train(self, data):
        """Computes the statistics (e.g. mean and standard deviation) needed for the chosen transformation from the
        provided dataset. With the exception of the 'identity' transform, the DataTransformer always needs to be
        trained before the `transform` and `back_transform` methods can be used.

        Args:
            data (array, Dataset): the data used to compute the statistics needed for the transformation. This can
                be a 2d numpy array, or a Dataset object.
        """

        self._dims = None  # reset _dims if we retrain the DataTransformer

        # for splitting periodic variables we need a dataset, so that we can check which variables are periodic
        # and that are their lower/upper bounds
        if 'periodic' in self.transformations:
            if isinstance(data, Dataset) is False:
                message = 'in order to transform periodic variables you need to provide a Dataset object as the data argument'
                Logger.log(message, 'ERROR')

            # remember the input dimensions
            self._dims = np.shape(data.data)[1]

            # extract the info about periodic variables
            self._parse_dataset_for_periodic(data)

            # Now swap dataset for data after periodic transform. This is done just in case the periodic transform is
            # composed with other transformations that will then require operating on a higher dimensional array
            # the means, stddev etc. statistics will need to have matching dimensions
            data = self._forward_periodic(data.data.to_numpy())
        else:
            # allow passing a dataset
            if isinstance(data, Dataset) is True:
                data = data.data.to_numpy()
            self._validate_data(data)
            # remember the input dimensions
            self._dims = np.shape(data)[1]

        # ------------------------
        # Get stats about the data
        # ------------------------
        data = np.array(data)

        self._mean = np.mean(data, axis=0)
        self._stddev = np.std(data, axis=0)
        self._min = np.amin(data, axis=0)
        self._max = np.amax(data, axis=0)

        self.trained = True
    def add(self, param):
        """

        Args:
            param:

        Returns:

        """
        if isinstance(param, ObjectParameter):
            self._add_param(param)
        elif isinstance(param, list):
            for _param in param:
                self._add_param(_param)
        else:
            Logger.log('Please provide a valid parameter', 'ERROR')
    def get_param(self, name):
        """

        Args:
            name:

        Returns:

        """
        for param in self.parameters:
            if param['name'] == name:
                return param
        message = 'Could not find Parameter with name {0} in {1}'.format(
            name, str(self))
        Logger.log(message, 'WARNING')
        return None
Exemple #18
0
    def _guess_db_kind(self, file_name):
        from . import databases

        file_type = file_name.split(".")[-1]
        for db_kind, db in databases.items():
            if db.is_valid_file_type(file_type):
                break
        else:
            from . import db_types

            Logger.log(
                "Could not find database type {}. Please choose from {}".
                format(file_type, db_types),
                "ERROR",
            )
            return None
        return db_kind
Exemple #19
0
    def add_db(self, kind, *args, **kwargs):
        try:
            database = __import__(f"olympus.databases.database_{kind}",
                                  fromlist=[f"Wrapper_{kind}"])
        except ModuleNotFoundError:
            Logger.log(" ... proceeding with pickle database",
                       "INFO",
                       only_once=True)
            kind = "pickle"
            database = __import__(f"olympus.databases.database_{kind}",
                                  fromlist=[f"Wrapper_{kind}"])

        database = getattr(database, f"Wrapper_{kind}")
        db = database(*args, **kwargs)
        self.dbs[db.name] = db
        if self.active_db is None:
            self.active_db = db
Exemple #20
0
    def _get_dataset(dataset_name):

        # check if dataset already exists
        expected_files = ["dataset.zip", "data.csv", "description.txt"]
        exists = False
        for expected_file in expected_files:
            exists = exists or os.path.isfile(
                f"{__home__}/datasets/dataset_{dataset_name}/{expected_file}")
        if exists:
            Logger.log("The dataset already exists", "INFO")
            return

        # download dataset, check with github first
        for Connector in [ConnectorGithub, ConnectorServer]:
            success = Connector().get_dataset(dataset_name)
            if success:
                break
Exemple #21
0
 def __init__(self, kind='continuous', **kwargs):
     if kind in self.KINDS:
         self.kind = kind
         for prop in dir(self.KINDS[kind]):
             if prop.startswith('ATT_'):
                 setattr(self, prop, getattr(self.KINDS[kind], prop))
         self.KINDS[kind].__init__(self)
         for key, value in kwargs.items():
             if 'ATT_{}'.format(key.upper()) in dir(self):
                 self.add(key, value)
         if not self.KINDS[kind]._validate(self):
             message = 'Could not validate {}'.format(str(self))
             Logger.log(message, 'WARNING')
     else:
         message = '''Could not initialize parameter.
         Parameter kind {} is unknown. Please choose from {}'''.format(
             kind, ','.join(list(self.KINDS.keys())))
         Logger.log(message, 'ERROR')
 def _register_dbs(self, datasets_):
     # only register complete baselines
     datasets = datasets_.copy()
     self.baseline_db_files = {}
     self.baseline_dbs = {}
     for db_file in glob.glob(f"{__home__}/db_baseline_*sqlite"):
         dataset = db_file.split("/")[-1].split(".")[0][12:]
         self.baseline_db_files[dataset] = db_file
         if dataset in datasets:
             datasets.remove(dataset)
         else:
             Logger.log(
                 f"found complete baseline for not reported dataset: {dataset}",
                 "WARNING",
             )
     for dataset in datasets:
         Logger.log(
             f"could not find complete baseline for dataset: {dataset}", "WARNING"
         )
    def __init__(self, from_dict=None, from_json=None, name='CustomConfig'):
        """

        Args:
            from_dict:
            from_json:
            name:
        """
        super(Config, self).__init__(me=name)
        self.name = name

        if from_dict is not None:
            self.from_dict(info_dict=from_dict)
        elif from_json is not None:
            self.from_json(json_file=from_json)

        if from_dict is not None and from_json is not None:
            message = 'you have passed both "from_dict" and "from_json" arguments to Config: "from_json" will be discarded'
            Logger.log(message, 'WARNING')
    def _generate_first_population(self):
        if self.verbose is True:
            Logger.log(f'Creating first population of size {self.pop_size}',
                       'INFO')

        # Structure initializers
        bounds = [param['domain'] for param in self._param_space]
        self.toolbox.register("individual",
                              self.initIndividual,
                              icls=creator.Individual,
                              bounds=bounds)
        self.toolbox.register("population", tools.initRepeat, list,
                              self.toolbox.individual)
        self.pop = self.toolbox.population(n=self.pop_size)
        self.latest_pop_size = self.pop_size

        # delete creator classes
        del creator.Individual
        del creator.FitnessMin
 def _load_summaries(self, datasets_):
     datasets = datasets_.copy()
     self.baseline_summaries = {}
     # nothing to do if the summary is not available
     if not os.path.isfile(self.summary_file):
         return
     # load summary
     with open(self.summary_file, "rb") as content:
         baseline_summaries = pickle.load(content)
     for dataset, summary in baseline_summaries.items():
         if dataset in datasets:
             self.baseline_summaries[dataset] = summary
             datasets.remove(dataset)
         else:
             Logger.log(
                 f"found summary for not reported dataset: {dataset}", "WARNING"
             )
     for dataset in datasets:
         Logger.log(f"could not find summary for dataset: {dataset}", "WARNING")
    def _set_dataset(self, dataset):
        """ registers a dataset for emulator

        Args:
            dataset (str): name of available dataset, or Dataset object.
        """
        if type(dataset) == str:
            self.dataset = Dataset(kind=dataset)
        elif isinstance(dataset, Dataset):
            self.dataset = dataset
        else:
            raise NotImplementedError

        # check that the param_space is defined
        if self.dataset.param_space is None:
            message = (
                "The param_space information is not present in the Dataset object provided. Please use "
                "Dataset.set_param_space to define the type of variables present in the dataset before "
                "instantiating the Emulator.")
            Logger.log(message, "ERROR")
    def validate(self, param_vector):
        """

        Args:
            param_vector:

        Returns:

        """
        bounds = self.param_bounds
        valid = True
        for i, bound in enumerate(bounds):
            entry = param_vector[i]
            valid = valid and bound[0] <= entry
            valid = valid and entry <= bound[1]

        if valid is False:
            message = f'Not all parameters of {param_vector} are within bounds!'
            Logger.log(message, 'WARNING')

        return valid
    def get(self, dataset, kind="summary"):
        """ Retrieves baseline for a given dataset

        Args:
            dataset (str): name of the dataset for which baseline should be retrieved
            kind (str): indicates format of baseline; choose from "summary", "db", or "campaigns"

        Returns:
            requested baseline
        """
        if kind == "summary":
            return self.get_summary(dataset)
        elif kind == "db":
            return self.get_db(dataset)
        elif kind == "campaigns":
            return self.get_campaigns(dataset)
        else:
            Logger.log(
                f'could not understand kind: "{kind}". Please choose from "summary", "db", or "campaigns"',
                "ERROR",
            )
Exemple #29
0
def load_dataset(kind):
    """Loads a dataset from the Olympus dataset library.

    Args:
        kind (str): kind of Olympus dataset to load.

    Returns:
        (tuple): tuple containing:
            data (array): numpy array where each row is a sample and each column is a feature/target. The columns are
            sorted such features come first and then targets.
            config (dict): dict containing information on the features and targets present in data.
            description (str): string describing the dataset.
    """

    _validate_dataset_args(kind=kind,
                           data=None,
                           columns=None,
                           target_names=None)
    datasets_path = os.path.dirname(os.path.abspath(__file__))

    # load description
    with open("".join(
            f"{datasets_path}/dataset_{kind}/description.txt")) as txtfile:
        description = txtfile.read()

    # load info on features/targets
    with open("".join(f"{datasets_path}/dataset_{kind}/config.json"),
              "r") as content:
        config = json.loads(content.read())

    # load data
    csv_file = "".join(f"{datasets_path}/dataset_{kind}/data.csv")
    try:
        data = read_csv(csv_file, header=None).to_numpy()
    except FileNotFoundError:
        Logger.log(f"Could not find data.csv for dataset {kind}", "FATAL")

    return data, config, description
Exemple #30
0
    def _set_param_space(self, param_space):
        self.param_space = param_space
        self.dims = len(self.param_space)
        self.bounds = self.param_space.param_bounds

        # if budget provided, define levels
        if self.budget is not None:
            self._get_approximate_levels()

        # allow providing a list of levels to tune the budget
        if isinstance(self.levels, int):
            self._levels = [self.levels] * self.dims
        elif isinstance(self.levels, list) or isinstance(
                self.levels, np.ndarray):
            if len(self.levels) != self.dims:
                message = (
                    f'The number of levels provided ({len(self.levels)}) does not match dimensionality of the '
                    f'parameter space ({self.dimlists}).')
                Logger.log(message, 'ERROR')
            self._levels = list(self.levels)
        else:
            raise ValueError(
                'Argument `level` can only be a integer or a list.')