Ejemplo n.º 1
0
    def _download_dataset(self, dataset_name):
        Logger.log(f"downloading dataset {dataset_name}", "INFO")
        url = f"{self.URL}/get_dataset"
        data = {"dataset_name": dataset_name}
        response = requests.post(url, data=data)
        if response.status_code == 200:

            target_dir = f"{__home__}/datasets/dataset_{dataset_name}"
            target_name = f"{target_dir}/dataset.zip"

            try:
                os.makedirs(target_dir)
            except:
                pass

            # save file
            Logger.log("saving dataset", "INFO")
            with open(target_name, "wb") as content:
                content.write(response.content)

            # unzip file
            Logger.log("unpacking dataset", "INFO")
            subprocess.call(f"unzip {target_name} -d {target_dir}", shell=True)
            Logger.log("dataset installed", "INFO")

        elif response.status_code == 204:
            error = response.json()["error"]
            Logger.log(f"did not get dataset ({error})", "ERROR")
 def minima(self):
     message = 'LinearFunnel has an infinite number of minima at 0.45 < x_i < 0.55, for each x_i in x'
     Logger.log(message, 'INFO')
     # minimum at the centre
     params = [0.5] * self.param_dim
     value = self._run(params)
     return [{'params': params, 'value': value}]
Ejemplo n.º 3
0
def Surface(kind='Dejong', param_dim=2):
    """Convenience function to access surfaces via a slightly higher level interface. It returns a certain surface
    with defaults arguments by keyword.

    Args:
        kind (str or AbstractPlanner): Keyword identifying one of the algorithms available in Olympus. Alternatively,
            you can pass a custom algorithm that is a subclass of AbstractPlanner.
        param_dim (int):

    Returns:
        Surface: An instance of the chosen surface.
    """
    _validate_surface_kind(kind)
    # if a string is passed, then load the corresponding wrapper
    if type(kind) == str:
        surface = import_surface(kind)
        if kind in [
                'Branin', 'Denali', 'Everest', 'K2', 'Kilimanjaro',
                'Matterhorn', 'MontBlanc'
        ]:
            surface = surface()
            if param_dim != 2:
                message = f'Surface {kind} is only defined in 2 dimensions: setting `param_dim`=2'
                Logger.log(message, 'WARNING')
        else:
            surface = surface(param_dim=param_dim)
    # if an instance of a planner is passed, simply return the same instance
    elif isinstance(kind, AbstractSurface):
        surface = kind
    # if a custom class is passed, then that is the 'wrapper'
    elif issubclass(kind, AbstractSurface):
        surface = kind()

    return surface
 def from_file(self, file_name):
     self._from_file_name(file_name)
     if not self.db_exists:
         Logger.log("Could not find database file {}".format(file_name),
                    "ERROR")
         return None
     self._load_db()
	def from_dict(self, info_dict, param_space=None):
		""" Creates a ParamVector representation of a given dictionary.

        Args:
            info_dict (dict): dictionary with parameter names and values.
            param_space (ParameterSpace): ParameterSpace instance. This is
				typically defined as part of a Dataset and is also inherited by
				Emulator. If a `param_space` is defined, `info_dict` will be
				checked to ensure the provided keys match those in `param_space`,
				otherwise `info_dict` is accepted as is. Default is None.
        """

		if param_space is None:
			for key, value in info_dict.items():
				# define parameter of parameter space
				self.param_space.add(Parameter(name=key))
				# add specific value for the parameter
				self.add(key, value)

		elif param_space is not None:
			if set(param_space.param_names) != set(list(info_dict.keys())):
				message = 'The dictionary keys provided do not match those in the parameter space'
				Logger.log(message, 'ERROR')
			self.param_space = param_space
			for param_name in param_space.param_names:
				# add specific value for the parameter
				self.add(param_name, info_dict[param_name])
		return self
Ejemplo n.º 6
0
 def __call__(args):
     if args.list is True:
         ParserDownload._list_datasets()
     elif args.name is not None:
         ParserDownload._get_dataset(args.name)
     else:
         Logger.log("could not parse command line arguments", "ERROR")
Ejemplo n.º 7
0
def test_init_wrong_type():
    with open('test.dat', 'w') as content:
        content.write('olympus')
    database = Database().from_file('test.dat')
    assert len(Logger.ERRORS) == 2
    Logger.purge()
    os.remove('test.dat')
Ejemplo n.º 8
0
    def ask(self, return_as=None):
        """ suggest new set of parameters

        Args:
            return_as (string): choose data type for returned parameters
                allowed options (dict, array)

        Returns:
            ParameterVector: newly generated parameters
        """

        self.num_generated += 1
        param_vector = self._ask()

        # check that the parameters suggested are within the bounds of our param_space
        self._validate_paramvector(param_vector)

        if return_as is not None:
            try:
                param_vector = getattr(param_vector,
                                       'to_{}'.format(return_as))()
            except AttributeError:
                Logger.log(
                    'could not return param_vector as "{}"'.format(return_as),
                    'ERROR')
        return param_vector
Ejemplo n.º 9
0
    def __init__(self, *args, **kwargs):
        Object.__init__(self, *args, **kwargs)
        self.num_generated = 0
        self.param_space = None
        self._params = None
        self._values = None
        self.SUBMITTED_PARAMS = []
        self.RECEIVED_VALUES = []

        # rm all those vars in config that are not needed/used by ScipyWrapper
        for var in ['goal', 'init_guess', 'random_seed']:
            if var in kwargs:
                del kwargs[var]
        self.config = Config(from_dict=kwargs)

        # self.goal is an abstract attribute that needs to be defined by the subclasses of AbstractPlanner
        # Since all planner wrappers are implemented in minimization mode, we flip the measurements if we want to
        # perform a maximization
        if self.goal == 'minimize':
            self.flip_measurements = False
        elif self.goal == 'maximize':
            self.flip_measurements = True
        else:
            message = f'attribute `goal` can only be "minimize" or "maximize". "{self.goal}" is not a valid value'
            Logger.log(message, 'ERROR')
Ejemplo n.º 10
0
    def optimize(self, emulator, num_iter=1, verbose=False):
        """Optimizes a surface for a fixed number of iterations.

        Args:
            emulator (object): Emulator or a Surface instance to optimize over.
            num_iter (int): Maximum number of iterations allowed.
            verbose (bool): Whether to print information to screen.

        Returns:
            campaign (Campaign): Campaign object with information about the optimization, including all parameters
                tested and measurements obtained.
        """

        # update num_iter if needed by the specific wrapper
        if hasattr(self, 'num_iter') and self.num_iter != num_iter:
            Logger.log(
                f'Updating the number of sampling points of planner {type(self).__name__} to {num_iter}',
                'INFO')
            self.num_iter = num_iter

        # same for budget
        if hasattr(self, 'budget') and self.budget != num_iter:
            Logger.log(
                f'Updating the number of sampling points of planner {type(self).__name__} to {num_iter}',
                'INFO')
            self.budget = num_iter

        # reset planner if it has a 'reset' method. Assuming that if there is a 'reset' method it is needed here
        # This is used by Deap for example, to clear the latest population before doing another optimization
        if callable(getattr(self, "reset", None)):
            self.reset()

        # use campaign to store info, and then to be returned
        campaign = Campaign()
        campaign.set_planner_specs(self)
        campaign.set_emulator_specs(emulator)

        # provide the planner with the parameter space.
        # param space in emulator as it originates from dataset
        self.set_param_space(emulator.param_space)

        # Optimize: i.e. call the planner recommend method for max_iter times
        for i in range(num_iter):
            if verbose is True:
                Logger.log(f'Optimize iteration {i+1}', 'INFO')
                Logger.log(f'Obtaining parameters from planner...', 'INFO')
            # get new params from planner
            # NOTE: now we get 1 param at a time, a possible future expansion is to return batches
            params = self.recommend(observations=campaign.observations)

            # get measurement from emulator/surface
            if verbose is True:
                Logger.log(f'Obtaining measurement from emulator...', 'INFO')
            values = emulator.run(params.to_array(), return_paramvector=True)

            # store parameter and measurement pair in campaign
            campaign.add_observation(params, values)

        return campaign
Ejemplo n.º 11
0
def test_name_collisions():
    param_space = ParameterSpace()
    for _ in range(4):
        param = Parameter(name=f'param_{_}')
        param_space.add(param)
    param_space.add(Parameter(name='param_0'))
    assert len(Logger.ERRORS) == 1
    Logger.purge()
Ejemplo n.º 12
0
def test_auto_init_pickle():
    file_name = 'test.pickle'
    with open(file_name, 'wb') as content:
        pickle.dump({}, content)
    database = Database().from_file(file_name)
    assert database.db.kind == 'pkl'
    os.remove(file_name)
    Logger.purge()
Ejemplo n.º 13
0
 def _add_param(self, param):
     # check if we already have that param
     if param.name in self.param_names:
         message = '''Parameter "{}" is already defined'''.format(
             param.name)
         Logger.log(message, 'ERROR')
     else:
         self.parameters.append(param)
Ejemplo n.º 14
0
 def _validate_paramvector(self, param_vector):
     for key, value in param_vector.to_dict().items():
         param = self.param_space.get_param(name=key)
         if param['type'] == 'continuous':
             if not param['low'] <= value <= param['high']:
                 message = 'Proposed parameter {0} not within defined bounds ({1},{2})'.format(
                     value, param['low'], param['high'])
                 Logger.log(message, 'WARNING')
Ejemplo n.º 15
0
 def _list_datasets():
     # check github first and the server only as a backup
     for Connector in [ConnectorGithub, ConnectorServer]:
         datasets = Connector().list()
         if isinstance(datasets, list):
             Logger.log(f"found datasets: {datasets}", "INFO")
             return
     Logger.log("could not retrieve list of datasets", "ERROR")
Ejemplo n.º 16
0
 def get_campaigns(self, dataset):
     if dataset in self.baseline_dbs.keys():
         return [campaign for campaign in self.baseline_dbs[dataset]]
     elif dataset in self.baseline_db_files.keys():
         self._load_baseline_db(dataset)
         return [campaign for campaign in self.baseline_dbs[dataset]]
     else:
         Logger.log(f"could not find baseline db for dataset: {dataset}", "ERROR")
Ejemplo n.º 17
0
 def _process_response(self, response):
     if response.status_code == 200:
         return True
     elif response.status_code == 404:
         Logger.log("could not reach server", "ERROR")
         return False
     else:
         Logger.log("unknown error", "ERROR")
         return False
Ejemplo n.º 18
0
    def _ask(self):
        if self.grid_created is False:
            self._create_grid()
        param = self.samples.pop(0)

        if len(self.samples) == 0:
            message = 'Last parameter being provided - there will not be any more available samples in the grid.'
            Logger.log(message, 'INFO')

        return ParameterVector(array=param, param_space=self.param_space)
 def maxima(self):
     message = 'DiscreteMichalewicz has an infinite number of maxima'
     Logger.log(message, 'INFO')
     # some maxima
     maxima = []
     params = product([0, 1], repeat=self.param_dim)
     for param in params:
         param = list(param)
         value = self._run(param)
         maxima.append({'params': param, 'value': value})
     return maxima
Ejemplo n.º 20
0
def check_module(module_name, message, **kwargs):
    try:
        _ = __import__(module_name)
    except ModuleNotFoundError:
        from olympus import Logger
        error = traceback.format_exc()
        for line in error.split('\n'):
            if 'ModuleNotFoundError' in line:
                module = line.strip().strip("'").split("'")[-1]
        kwargs.update(locals())
        message = f'{message}'.format(**kwargs)
        Logger.log(message, 'ERROR')
Ejemplo n.º 21
0
    def list(self):
        Logger.log("connecting to server", "INFO")
        url = f"{self.URL}/list_datasets"
        print("URL", url)
        response = requests.post(url, data={})
        if response.status_code == 200:
            datasets = response.json()["datasets"]
            return sorted(datasets)
        else:
            return self._process_response(response)

        return self._process_response(response)
Ejemplo n.º 22
0
    def _validate_args(transformations):
        # check validity of transformation argument
        for transformation in transformations:
            if not (hasattr(DataTransformer, f'_forward_{transformation}') and
                    hasattr(DataTransformer, f'_backward_{transformation}')):
                raise NotImplementedError(
                    f'transformation {transformation} not implemented. Please select one of the '
                    f'available transformation.')

        if 'periodic' in transformations and transformations.index(
                'periodic') != 0:
            message = 'periodic transform is allowed only as the first transformation'
            Logger.log(message, 'ERROR')
Ejemplo n.º 23
0
    def __init__(self,
                 planner,
                 emulator=None,
                 surface=None,
                 campaign=Campaign(),
                 database=None):
        """ The Evaluator does higher level operations that Planners and 
        Emulators do not do on their own. For instance, communicating parameters 
        and measurements to each other, keeping track of them ensuring they 
        match, and storing these in a Campaign object. All this can also be done 
        by the user using planner, emulator and campaign objects, which might 
        allow more customization. However, Evaluator provides a convenient 
        higher-level interface for common optimization tasks.

        Args:
            planner (Planner): an instance of a Planner.
            emulator (Emulator): an instance of a trained Emulator.
            surface (Surface): an instance of a Surface
            campaign (Campaign): an instance of a Campaign. By default, a new 
                Campaign instance is created. If this is set to None, no campaign 
                info will be stored.
            database (object): ...
        """
        Object.__init__(**locals())

        if emulator is not None:
            assert surface is None
            self.emulator_type = 'numerical'
        elif surface is not None:
            assert emulator is None
            self.emulator_type = 'analytic'
            self.emulator = surface
        else:
            Logger.log('One of emulator or surface needs to be provided',
                       'FATAL')

#        if isinstance(self.emulator, Emulator):
#            self.emulator_type = 'numerical'
#        elif isinstance(self.emulator, Surface):
#            self.emulator_type = 'analytic'

# provide the planner with the parameter space.
# NOTE: right now, outside of Evaluator, the param_space for a planner
#       needs to be set "manually" by the user
        self.planner.set_param_space(
            self.emulator.param_space
        )  # param space in emulator as it originates from dataset

        if self.campaign is not None:
            self.campaign.set_planner_specs(planner)
            self.campaign.set_emulator_specs(emulator)
Ejemplo n.º 24
0
def _validate_dataset_args(kind, data, columns, target_names):
    if kind is not None:
        # -----------------------------------
        # check that a correct name is passed
        # -----------------------------------
        # TODO: reduce redundant code by importing the list from where we have it already
        module_path = os.path.dirname(os.path.abspath(__file__))
        olympus_datasets = []
        for dir_name in glob(f"{module_path}/dataset_*"):
            dir_name = dir_name.split("/")[-1][8:]
            olympus_datasets.append(dir_name)
        if kind not in olympus_datasets:
            message = (
                "Could not find dataset `{0}`. Please choose from one of the available "
                "datasets: {1}.".format(kind,
                                        ", ".join(list(olympus_datasets))))
            Logger.log(message, "FATAL")
        # --------------------------------------------------------------
        # we will discard these arguments, so check if they are provided
        # --------------------------------------------------------------
        if data is not None:
            message = (
                "One of the Olympus datasets has been loaded via the argument `kind`, argument `data` "
                "will be discarded")
            Logger.log(message, "WARNING")
        if columns is not None:
            message = (
                "One of the Olympus datasets has been loaded via the argument `kind`, argument `columns` "
                "will be discarded")
            Logger.log(message, "WARNING")
        if target_names is not None:
            message = (
                "One of the Olympus datasets has been loaded via the argument `kind`, argument "
                "`target_names` will be discarded")
            Logger.log(message, "WARNING")
Ejemplo n.º 25
0
    def run(self, params, return_paramvector=False):
        """Evaluate the surface at the chosen location.

        Args:
            params (array): Set of input parameters for which to return the function value.
            return_paramvector (bool): Whether to return a ``ParameterVector`` object instead of a list of lists.
                Default is False.

        Returns:
            values (ParameterVector): function values evaluated at the chosen locations.
        """
        if isinstance(params, float) or isinstance(params, int):
            params = np.array([params])
        elif type(params) == list:
            params = np.array(params)
        if len(params.shape) == 1:
            params = np.expand_dims(params, axis=0)

        # TODO: these validations could be moved to ParameterSpace class
        # validate params
        if params.shape[1] != len(self.param_space):
            message = (f'Dimensions of provided params ({params.shape[1]}) does not match expected '
                       f'dimension ({len(self.param_space)})')
            Logger.log(message, 'ERROR')

        # this raises warnings for out-of-bounds parameters
        for param_set in params:
            self.param_space.validate(param_set)

        # get values from the surface class
        y_preds = [[self._run(param_set)] for param_set in params]  # 2d array

        # if we are not asking for a ParamVector, we can just return y_preds
        if return_paramvector is False:
            return y_preds

        # return a ParameterVector
        # NOTE: while we do not allow batches or multiple objectives yet, this code is supposed to be able to support
        #  those
        y_pred_objects = []  # list of ParamVectors with all samples and objectives
        # iterate over all samples (if we returned a batch of predictions)
        for y_pred in y_preds:
            y_pred_object = ParameterVector()
            # iterate over all objectives/targets
            for target_name, y in zip(['target_0'], y_pred):
                y_pred_object.from_dict({target_name: y})
            # append object to list
            y_pred_objects.append(y_pred_object)

        return y_pred_objects
Ejemplo n.º 26
0
def _validate_planner_kind(kind):
    # if we received a string
    if type(kind) == str:
        from . import PlannerLoader

        kind = PlannerLoader.file_to_class(kind)
        avail_planners = get_planners_list()
        if kind not in avail_planners:
            message = ('Planner "{0}" not available in Olympus. Please choose '
                       "from one of the available planners: {1}".format(
                           kind, ", ".join(avail_planners)))
            Logger.log(message, "FATAL")

    # if we get an instance of a planner class
    elif isinstance(kind, AbstractPlanner):
        # make sure it has the necessary methods
        for method in ["_set_param_space", "_tell", "_ask"]:
            implementation = getattr(kind, method, None)
            if not callable(implementation):
                message = f'The object {kind} does not implement the necessary method "{method}"'
                Logger.log(message, "FATAL")

    # if we received a custom planner class
    elif issubclass(kind, AbstractPlanner):
        # make sure it has the necessary methods
        for method in ["_set_param_space", "_tell", "_ask"]:
            implementation = getattr(kind, method, None)
            if not callable(implementation):
                message = f'The object {kind} does not implement the necessary method "{method}"'
                Logger.log(message, "FATAL")

    # if we do not know what was passed raise an error
    else:
        message = 'Could not initialize Planner: the argument "kind" is neither a string or AbstractPlanner subclass'
        Logger.log(message, "FATAL")
Ejemplo n.º 27
0
 def _create_optimizer(self):
     from pyDOE import lhs
     if self.budget is None:
         message = (
             f'Please provide a number of samples for this planner. Given no number of samples provided, '
             f'falling back to setting `budget` to {len(self.param_space)}')
         Logger.log(message, 'WARNING')
         self.budget = len(self.param_space)
     self.samples = lhs(len(self.param_space), samples=self.budget)
     for index, param in enumerate(self.param_space):
         self.samples[:, index] = (
             param.high - param.low) * self.samples[:, index] + param.low
     self.samples = list(self.samples)
     self.has_optimizer = True
Ejemplo n.º 28
0
def _validate_noise_kind(kind):
    # if we received a string
    if type(kind) == str:
        avail_noises = get_noises_list()
        if kind not in avail_noises:
            message = ('Noise "{0}" not available in Olympus. Please choose '
                       'from one of the available noise objects: {1}'.format(kind, ', '.join(avail_noises)))
            Logger.log(message, 'FATAL')

    # if we get an instance of a noise class
    elif isinstance(kind, AbstractNoise):
        # make sure it has the necessary methods
        for method in ['_add_noise']:
            implementation = getattr(kind, method, None)
            if not callable(implementation):
                message = f'The object {kind} does not implement the necessary method "{method}"'
                Logger.log(message, 'FATAL')

    # if we received a custom planner class
    elif issubclass(kind, AbstractNoise):
        # make sure it has the necessary methods
        for method in ['_add_noise']:
            implementation = getattr(kind, method, None)
            if not callable(implementation):
                message = f'The object {kind} does not implement the necessary method "{method}"'
                Logger.log(message, 'FATAL')

    # if we do not know what was passed raise an error
    else:
        message = 'Could not initialize Noise: the argument "kind" is neither a string or AbstractNoise subclass'
        Logger.log(message, 'FATAL')
Ejemplo n.º 29
0
 def list(self):
     Logger.log("connecting to github", "INFO")
     tmp_file = "remote_folders"
     remote_datasets = []
     subprocess.call(f"svn ls -R {self.URL} > {tmp_file}", shell=True)
     with open(tmp_file, "r") as content:
         for line in content:
             dataset_name = line.split("/")[0]
             if not dataset_name in remote_datasets:
                 remote_datasets.append(dataset_name)
     os.remove(tmp_file)
     remote_datasets = [
         remote_dataset[8:] for remote_dataset in remote_datasets
     ]
     return sorted(remote_datasets)
Ejemplo n.º 30
0
    def train(self, data):
        """Computes the statistics (e.g. mean and standard deviation) needed for the chosen transformation from the
        provided dataset. With the exception of the 'identity' transform, the DataTransformer always needs to be
        trained before the `transform` and `back_transform` methods can be used.

        Args:
            data (array, Dataset): the data used to compute the statistics needed for the transformation. This can
                be a 2d numpy array, or a Dataset object.
        """

        self._dims = None  # reset _dims if we retrain the DataTransformer

        # for splitting periodic variables we need a dataset, so that we can check which variables are periodic
        # and that are their lower/upper bounds
        if 'periodic' in self.transformations:
            if isinstance(data, Dataset) is False:
                message = 'in order to transform periodic variables you need to provide a Dataset object as the data argument'
                Logger.log(message, 'ERROR')

            # remember the input dimensions
            self._dims = np.shape(data.data)[1]

            # extract the info about periodic variables
            self._parse_dataset_for_periodic(data)

            # Now swap dataset for data after periodic transform. This is done just in case the periodic transform is
            # composed with other transformations that will then require operating on a higher dimensional array
            # the means, stddev etc. statistics will need to have matching dimensions
            data = self._forward_periodic(data.data.to_numpy())
        else:
            # allow passing a dataset
            if isinstance(data, Dataset) is True:
                data = data.data.to_numpy()
            self._validate_data(data)
            # remember the input dimensions
            self._dims = np.shape(data)[1]

        # ------------------------
        # Get stats about the data
        # ------------------------
        data = np.array(data)

        self._mean = np.mean(data, axis=0)
        self._stddev = np.std(data, axis=0)
        self._min = np.amin(data, axis=0)
        self._max = np.amax(data, axis=0)

        self.trained = True