def _download_dataset(self, dataset_name): Logger.log(f"downloading dataset {dataset_name}", "INFO") url = f"{self.URL}/get_dataset" data = {"dataset_name": dataset_name} response = requests.post(url, data=data) if response.status_code == 200: target_dir = f"{__home__}/datasets/dataset_{dataset_name}" target_name = f"{target_dir}/dataset.zip" try: os.makedirs(target_dir) except: pass # save file Logger.log("saving dataset", "INFO") with open(target_name, "wb") as content: content.write(response.content) # unzip file Logger.log("unpacking dataset", "INFO") subprocess.call(f"unzip {target_name} -d {target_dir}", shell=True) Logger.log("dataset installed", "INFO") elif response.status_code == 204: error = response.json()["error"] Logger.log(f"did not get dataset ({error})", "ERROR")
def minima(self): message = 'LinearFunnel has an infinite number of minima at 0.45 < x_i < 0.55, for each x_i in x' Logger.log(message, 'INFO') # minimum at the centre params = [0.5] * self.param_dim value = self._run(params) return [{'params': params, 'value': value}]
def Surface(kind='Dejong', param_dim=2): """Convenience function to access surfaces via a slightly higher level interface. It returns a certain surface with defaults arguments by keyword. Args: kind (str or AbstractPlanner): Keyword identifying one of the algorithms available in Olympus. Alternatively, you can pass a custom algorithm that is a subclass of AbstractPlanner. param_dim (int): Returns: Surface: An instance of the chosen surface. """ _validate_surface_kind(kind) # if a string is passed, then load the corresponding wrapper if type(kind) == str: surface = import_surface(kind) if kind in [ 'Branin', 'Denali', 'Everest', 'K2', 'Kilimanjaro', 'Matterhorn', 'MontBlanc' ]: surface = surface() if param_dim != 2: message = f'Surface {kind} is only defined in 2 dimensions: setting `param_dim`=2' Logger.log(message, 'WARNING') else: surface = surface(param_dim=param_dim) # if an instance of a planner is passed, simply return the same instance elif isinstance(kind, AbstractSurface): surface = kind # if a custom class is passed, then that is the 'wrapper' elif issubclass(kind, AbstractSurface): surface = kind() return surface
def from_file(self, file_name): self._from_file_name(file_name) if not self.db_exists: Logger.log("Could not find database file {}".format(file_name), "ERROR") return None self._load_db()
def from_dict(self, info_dict, param_space=None): """ Creates a ParamVector representation of a given dictionary. Args: info_dict (dict): dictionary with parameter names and values. param_space (ParameterSpace): ParameterSpace instance. This is typically defined as part of a Dataset and is also inherited by Emulator. If a `param_space` is defined, `info_dict` will be checked to ensure the provided keys match those in `param_space`, otherwise `info_dict` is accepted as is. Default is None. """ if param_space is None: for key, value in info_dict.items(): # define parameter of parameter space self.param_space.add(Parameter(name=key)) # add specific value for the parameter self.add(key, value) elif param_space is not None: if set(param_space.param_names) != set(list(info_dict.keys())): message = 'The dictionary keys provided do not match those in the parameter space' Logger.log(message, 'ERROR') self.param_space = param_space for param_name in param_space.param_names: # add specific value for the parameter self.add(param_name, info_dict[param_name]) return self
def __call__(args): if args.list is True: ParserDownload._list_datasets() elif args.name is not None: ParserDownload._get_dataset(args.name) else: Logger.log("could not parse command line arguments", "ERROR")
def test_init_wrong_type(): with open('test.dat', 'w') as content: content.write('olympus') database = Database().from_file('test.dat') assert len(Logger.ERRORS) == 2 Logger.purge() os.remove('test.dat')
def ask(self, return_as=None): """ suggest new set of parameters Args: return_as (string): choose data type for returned parameters allowed options (dict, array) Returns: ParameterVector: newly generated parameters """ self.num_generated += 1 param_vector = self._ask() # check that the parameters suggested are within the bounds of our param_space self._validate_paramvector(param_vector) if return_as is not None: try: param_vector = getattr(param_vector, 'to_{}'.format(return_as))() except AttributeError: Logger.log( 'could not return param_vector as "{}"'.format(return_as), 'ERROR') return param_vector
def __init__(self, *args, **kwargs): Object.__init__(self, *args, **kwargs) self.num_generated = 0 self.param_space = None self._params = None self._values = None self.SUBMITTED_PARAMS = [] self.RECEIVED_VALUES = [] # rm all those vars in config that are not needed/used by ScipyWrapper for var in ['goal', 'init_guess', 'random_seed']: if var in kwargs: del kwargs[var] self.config = Config(from_dict=kwargs) # self.goal is an abstract attribute that needs to be defined by the subclasses of AbstractPlanner # Since all planner wrappers are implemented in minimization mode, we flip the measurements if we want to # perform a maximization if self.goal == 'minimize': self.flip_measurements = False elif self.goal == 'maximize': self.flip_measurements = True else: message = f'attribute `goal` can only be "minimize" or "maximize". "{self.goal}" is not a valid value' Logger.log(message, 'ERROR')
def optimize(self, emulator, num_iter=1, verbose=False): """Optimizes a surface for a fixed number of iterations. Args: emulator (object): Emulator or a Surface instance to optimize over. num_iter (int): Maximum number of iterations allowed. verbose (bool): Whether to print information to screen. Returns: campaign (Campaign): Campaign object with information about the optimization, including all parameters tested and measurements obtained. """ # update num_iter if needed by the specific wrapper if hasattr(self, 'num_iter') and self.num_iter != num_iter: Logger.log( f'Updating the number of sampling points of planner {type(self).__name__} to {num_iter}', 'INFO') self.num_iter = num_iter # same for budget if hasattr(self, 'budget') and self.budget != num_iter: Logger.log( f'Updating the number of sampling points of planner {type(self).__name__} to {num_iter}', 'INFO') self.budget = num_iter # reset planner if it has a 'reset' method. Assuming that if there is a 'reset' method it is needed here # This is used by Deap for example, to clear the latest population before doing another optimization if callable(getattr(self, "reset", None)): self.reset() # use campaign to store info, and then to be returned campaign = Campaign() campaign.set_planner_specs(self) campaign.set_emulator_specs(emulator) # provide the planner with the parameter space. # param space in emulator as it originates from dataset self.set_param_space(emulator.param_space) # Optimize: i.e. call the planner recommend method for max_iter times for i in range(num_iter): if verbose is True: Logger.log(f'Optimize iteration {i+1}', 'INFO') Logger.log(f'Obtaining parameters from planner...', 'INFO') # get new params from planner # NOTE: now we get 1 param at a time, a possible future expansion is to return batches params = self.recommend(observations=campaign.observations) # get measurement from emulator/surface if verbose is True: Logger.log(f'Obtaining measurement from emulator...', 'INFO') values = emulator.run(params.to_array(), return_paramvector=True) # store parameter and measurement pair in campaign campaign.add_observation(params, values) return campaign
def test_name_collisions(): param_space = ParameterSpace() for _ in range(4): param = Parameter(name=f'param_{_}') param_space.add(param) param_space.add(Parameter(name='param_0')) assert len(Logger.ERRORS) == 1 Logger.purge()
def test_auto_init_pickle(): file_name = 'test.pickle' with open(file_name, 'wb') as content: pickle.dump({}, content) database = Database().from_file(file_name) assert database.db.kind == 'pkl' os.remove(file_name) Logger.purge()
def _add_param(self, param): # check if we already have that param if param.name in self.param_names: message = '''Parameter "{}" is already defined'''.format( param.name) Logger.log(message, 'ERROR') else: self.parameters.append(param)
def _validate_paramvector(self, param_vector): for key, value in param_vector.to_dict().items(): param = self.param_space.get_param(name=key) if param['type'] == 'continuous': if not param['low'] <= value <= param['high']: message = 'Proposed parameter {0} not within defined bounds ({1},{2})'.format( value, param['low'], param['high']) Logger.log(message, 'WARNING')
def _list_datasets(): # check github first and the server only as a backup for Connector in [ConnectorGithub, ConnectorServer]: datasets = Connector().list() if isinstance(datasets, list): Logger.log(f"found datasets: {datasets}", "INFO") return Logger.log("could not retrieve list of datasets", "ERROR")
def get_campaigns(self, dataset): if dataset in self.baseline_dbs.keys(): return [campaign for campaign in self.baseline_dbs[dataset]] elif dataset in self.baseline_db_files.keys(): self._load_baseline_db(dataset) return [campaign for campaign in self.baseline_dbs[dataset]] else: Logger.log(f"could not find baseline db for dataset: {dataset}", "ERROR")
def _process_response(self, response): if response.status_code == 200: return True elif response.status_code == 404: Logger.log("could not reach server", "ERROR") return False else: Logger.log("unknown error", "ERROR") return False
def _ask(self): if self.grid_created is False: self._create_grid() param = self.samples.pop(0) if len(self.samples) == 0: message = 'Last parameter being provided - there will not be any more available samples in the grid.' Logger.log(message, 'INFO') return ParameterVector(array=param, param_space=self.param_space)
def maxima(self): message = 'DiscreteMichalewicz has an infinite number of maxima' Logger.log(message, 'INFO') # some maxima maxima = [] params = product([0, 1], repeat=self.param_dim) for param in params: param = list(param) value = self._run(param) maxima.append({'params': param, 'value': value}) return maxima
def check_module(module_name, message, **kwargs): try: _ = __import__(module_name) except ModuleNotFoundError: from olympus import Logger error = traceback.format_exc() for line in error.split('\n'): if 'ModuleNotFoundError' in line: module = line.strip().strip("'").split("'")[-1] kwargs.update(locals()) message = f'{message}'.format(**kwargs) Logger.log(message, 'ERROR')
def list(self): Logger.log("connecting to server", "INFO") url = f"{self.URL}/list_datasets" print("URL", url) response = requests.post(url, data={}) if response.status_code == 200: datasets = response.json()["datasets"] return sorted(datasets) else: return self._process_response(response) return self._process_response(response)
def _validate_args(transformations): # check validity of transformation argument for transformation in transformations: if not (hasattr(DataTransformer, f'_forward_{transformation}') and hasattr(DataTransformer, f'_backward_{transformation}')): raise NotImplementedError( f'transformation {transformation} not implemented. Please select one of the ' f'available transformation.') if 'periodic' in transformations and transformations.index( 'periodic') != 0: message = 'periodic transform is allowed only as the first transformation' Logger.log(message, 'ERROR')
def __init__(self, planner, emulator=None, surface=None, campaign=Campaign(), database=None): """ The Evaluator does higher level operations that Planners and Emulators do not do on their own. For instance, communicating parameters and measurements to each other, keeping track of them ensuring they match, and storing these in a Campaign object. All this can also be done by the user using planner, emulator and campaign objects, which might allow more customization. However, Evaluator provides a convenient higher-level interface for common optimization tasks. Args: planner (Planner): an instance of a Planner. emulator (Emulator): an instance of a trained Emulator. surface (Surface): an instance of a Surface campaign (Campaign): an instance of a Campaign. By default, a new Campaign instance is created. If this is set to None, no campaign info will be stored. database (object): ... """ Object.__init__(**locals()) if emulator is not None: assert surface is None self.emulator_type = 'numerical' elif surface is not None: assert emulator is None self.emulator_type = 'analytic' self.emulator = surface else: Logger.log('One of emulator or surface needs to be provided', 'FATAL') # if isinstance(self.emulator, Emulator): # self.emulator_type = 'numerical' # elif isinstance(self.emulator, Surface): # self.emulator_type = 'analytic' # provide the planner with the parameter space. # NOTE: right now, outside of Evaluator, the param_space for a planner # needs to be set "manually" by the user self.planner.set_param_space( self.emulator.param_space ) # param space in emulator as it originates from dataset if self.campaign is not None: self.campaign.set_planner_specs(planner) self.campaign.set_emulator_specs(emulator)
def _validate_dataset_args(kind, data, columns, target_names): if kind is not None: # ----------------------------------- # check that a correct name is passed # ----------------------------------- # TODO: reduce redundant code by importing the list from where we have it already module_path = os.path.dirname(os.path.abspath(__file__)) olympus_datasets = [] for dir_name in glob(f"{module_path}/dataset_*"): dir_name = dir_name.split("/")[-1][8:] olympus_datasets.append(dir_name) if kind not in olympus_datasets: message = ( "Could not find dataset `{0}`. Please choose from one of the available " "datasets: {1}.".format(kind, ", ".join(list(olympus_datasets)))) Logger.log(message, "FATAL") # -------------------------------------------------------------- # we will discard these arguments, so check if they are provided # -------------------------------------------------------------- if data is not None: message = ( "One of the Olympus datasets has been loaded via the argument `kind`, argument `data` " "will be discarded") Logger.log(message, "WARNING") if columns is not None: message = ( "One of the Olympus datasets has been loaded via the argument `kind`, argument `columns` " "will be discarded") Logger.log(message, "WARNING") if target_names is not None: message = ( "One of the Olympus datasets has been loaded via the argument `kind`, argument " "`target_names` will be discarded") Logger.log(message, "WARNING")
def run(self, params, return_paramvector=False): """Evaluate the surface at the chosen location. Args: params (array): Set of input parameters for which to return the function value. return_paramvector (bool): Whether to return a ``ParameterVector`` object instead of a list of lists. Default is False. Returns: values (ParameterVector): function values evaluated at the chosen locations. """ if isinstance(params, float) or isinstance(params, int): params = np.array([params]) elif type(params) == list: params = np.array(params) if len(params.shape) == 1: params = np.expand_dims(params, axis=0) # TODO: these validations could be moved to ParameterSpace class # validate params if params.shape[1] != len(self.param_space): message = (f'Dimensions of provided params ({params.shape[1]}) does not match expected ' f'dimension ({len(self.param_space)})') Logger.log(message, 'ERROR') # this raises warnings for out-of-bounds parameters for param_set in params: self.param_space.validate(param_set) # get values from the surface class y_preds = [[self._run(param_set)] for param_set in params] # 2d array # if we are not asking for a ParamVector, we can just return y_preds if return_paramvector is False: return y_preds # return a ParameterVector # NOTE: while we do not allow batches or multiple objectives yet, this code is supposed to be able to support # those y_pred_objects = [] # list of ParamVectors with all samples and objectives # iterate over all samples (if we returned a batch of predictions) for y_pred in y_preds: y_pred_object = ParameterVector() # iterate over all objectives/targets for target_name, y in zip(['target_0'], y_pred): y_pred_object.from_dict({target_name: y}) # append object to list y_pred_objects.append(y_pred_object) return y_pred_objects
def _validate_planner_kind(kind): # if we received a string if type(kind) == str: from . import PlannerLoader kind = PlannerLoader.file_to_class(kind) avail_planners = get_planners_list() if kind not in avail_planners: message = ('Planner "{0}" not available in Olympus. Please choose ' "from one of the available planners: {1}".format( kind, ", ".join(avail_planners))) Logger.log(message, "FATAL") # if we get an instance of a planner class elif isinstance(kind, AbstractPlanner): # make sure it has the necessary methods for method in ["_set_param_space", "_tell", "_ask"]: implementation = getattr(kind, method, None) if not callable(implementation): message = f'The object {kind} does not implement the necessary method "{method}"' Logger.log(message, "FATAL") # if we received a custom planner class elif issubclass(kind, AbstractPlanner): # make sure it has the necessary methods for method in ["_set_param_space", "_tell", "_ask"]: implementation = getattr(kind, method, None) if not callable(implementation): message = f'The object {kind} does not implement the necessary method "{method}"' Logger.log(message, "FATAL") # if we do not know what was passed raise an error else: message = 'Could not initialize Planner: the argument "kind" is neither a string or AbstractPlanner subclass' Logger.log(message, "FATAL")
def _create_optimizer(self): from pyDOE import lhs if self.budget is None: message = ( f'Please provide a number of samples for this planner. Given no number of samples provided, ' f'falling back to setting `budget` to {len(self.param_space)}') Logger.log(message, 'WARNING') self.budget = len(self.param_space) self.samples = lhs(len(self.param_space), samples=self.budget) for index, param in enumerate(self.param_space): self.samples[:, index] = ( param.high - param.low) * self.samples[:, index] + param.low self.samples = list(self.samples) self.has_optimizer = True
def _validate_noise_kind(kind): # if we received a string if type(kind) == str: avail_noises = get_noises_list() if kind not in avail_noises: message = ('Noise "{0}" not available in Olympus. Please choose ' 'from one of the available noise objects: {1}'.format(kind, ', '.join(avail_noises))) Logger.log(message, 'FATAL') # if we get an instance of a noise class elif isinstance(kind, AbstractNoise): # make sure it has the necessary methods for method in ['_add_noise']: implementation = getattr(kind, method, None) if not callable(implementation): message = f'The object {kind} does not implement the necessary method "{method}"' Logger.log(message, 'FATAL') # if we received a custom planner class elif issubclass(kind, AbstractNoise): # make sure it has the necessary methods for method in ['_add_noise']: implementation = getattr(kind, method, None) if not callable(implementation): message = f'The object {kind} does not implement the necessary method "{method}"' Logger.log(message, 'FATAL') # if we do not know what was passed raise an error else: message = 'Could not initialize Noise: the argument "kind" is neither a string or AbstractNoise subclass' Logger.log(message, 'FATAL')
def list(self): Logger.log("connecting to github", "INFO") tmp_file = "remote_folders" remote_datasets = [] subprocess.call(f"svn ls -R {self.URL} > {tmp_file}", shell=True) with open(tmp_file, "r") as content: for line in content: dataset_name = line.split("/")[0] if not dataset_name in remote_datasets: remote_datasets.append(dataset_name) os.remove(tmp_file) remote_datasets = [ remote_dataset[8:] for remote_dataset in remote_datasets ] return sorted(remote_datasets)
def train(self, data): """Computes the statistics (e.g. mean and standard deviation) needed for the chosen transformation from the provided dataset. With the exception of the 'identity' transform, the DataTransformer always needs to be trained before the `transform` and `back_transform` methods can be used. Args: data (array, Dataset): the data used to compute the statistics needed for the transformation. This can be a 2d numpy array, or a Dataset object. """ self._dims = None # reset _dims if we retrain the DataTransformer # for splitting periodic variables we need a dataset, so that we can check which variables are periodic # and that are their lower/upper bounds if 'periodic' in self.transformations: if isinstance(data, Dataset) is False: message = 'in order to transform periodic variables you need to provide a Dataset object as the data argument' Logger.log(message, 'ERROR') # remember the input dimensions self._dims = np.shape(data.data)[1] # extract the info about periodic variables self._parse_dataset_for_periodic(data) # Now swap dataset for data after periodic transform. This is done just in case the periodic transform is # composed with other transformations that will then require operating on a higher dimensional array # the means, stddev etc. statistics will need to have matching dimensions data = self._forward_periodic(data.data.to_numpy()) else: # allow passing a dataset if isinstance(data, Dataset) is True: data = data.data.to_numpy() self._validate_data(data) # remember the input dimensions self._dims = np.shape(data)[1] # ------------------------ # Get stats about the data # ------------------------ data = np.array(data) self._mean = np.mean(data, axis=0) self._stddev = np.std(data, axis=0) self._min = np.amin(data, axis=0) self._max = np.amax(data, axis=0) self.trained = True