Exemple #1
0
    def stash(self, x, y, z, all_xz_new, n_completed):
        """
        Write documents to database after optimization.

        Args:
            x (iterable): The current x guess.
            y: (iterable): The current y (objective function) value
            z (iterable): The z vector associated with x
            all_xz_new ([list] or [tuple]): The predicted next best guess(es),
                including their associated z vectors
            n_completed (int): The number of completed guesses/workflows

        Returns:
            opt_id (pymongo InsertedOneResult): The result of the insertion
                of the new optimization document in the database. If multiple
                opt_ids are valid (ie batch mode is enabled), the last opt_id
                is returned.
        """

        for xz_new in all_xz_new:
            # separate 'predicted' z features from the new x vector
            x_new, z_new = split_xz(xz_new, self.x_dims)
            x_new = convert_native(x_new)
            z_new = convert_native(z_new)

            # if it is a duplicate (such as a forced
            # identical first guess)
            forced_dupe = self.c.find_one({"x": x})

            acqmap = {
                "ei": "Expected Improvement",
                "pi": "Probability of Improvement",
                "lcb": "Lower Confidence Boundary",
                None: "Highest Value",
                "maximin": "Maximin Expected "
                "Improvement",
            }
            if self.predictor in self.builtin_predictors:
                predictorstr = (self.predictor + " with acquisition: " +
                                acqmap[self.acq])
                if self.n_objs > 1:
                    predictorstr += " using {} objectives".format(self.n_objs)
            else:
                predictorstr = self.predictor
            if forced_dupe:
                # only update the fields which should be updated
                self.c.find_one_and_update(
                    {"x": x},
                    {
                        "$set": {
                            "y": y,
                            "z": z,
                            "z_new": z_new,
                            "x_new": x_new,
                            "predictor": predictorstr,
                        }
                    },
                )
            else:
                # update all the fields, as it is a new document
                self.c.insert_one({
                    "z": z,
                    "y": y,
                    "x": x,
                    "z_new": z_new,
                    "x_new": x_new,
                    "predictor": predictorstr,
                    "index": n_completed + 1,
                })
            # ensure previously fin. workflow results are not overwritten by
            # concurrent predictions
            if (self.c.count_documents({
                    "x": x_new,
                    "y": {
                        "$exists": 1,
                        "$ne": "reserved"
                    }
            }) == 0):
                # reserve the new x to prevent parallel processes from
                # registering it as unsearched, since the next iteration of this
                # process will be exploring it
                res = self.c.insert_one({"x": x_new, "y": "reserved"})
                opt_id = res.inserted_id
            else:
                raise ValueError(
                    "The predictor suggested a guess which has already been "
                    "tried: {}".format(x_new))
        return opt_id
Exemple #2
0
    def optimize(self, fw_spec, manager_id):
        """
        Run the optimization algorithm.

        Args:
            fw_spec (dict): The firework spec.
            manager_id (ObjectId): The MongoDB object id of the manager
                document.

        Returns:
            x (iterable): The current x guess.
            y: (iterable): The current y (objective function) value
            z (iterable): The z vector associated with x
            all_xz_new ([list] or [tuple]): The predicted next best guess(es),
                including their associated z vectors
            n_completed (int): The number of completed guesses/workflows
        """
        x = list(fw_spec["_x"])
        y = fw_spec["_y"]
        if isinstance(y, (list, tuple)):
            if len(y) == 1:
                y = y[0]
            self.n_objs = len(y)
            if self.acq not in ("maximin", None):
                raise ValueError(
                    "{} is not a valid acquisition function for multiobjective "
                    "optimization".format(self.acq))
        else:
            if self.acq == "maximin":
                raise ValueError(
                    "Maximin is not a valid acquisition function for single "
                    "objective optimization.")
            self.n_objs = 1

        # If process A suggests a certain guess and runs it, process B may
        # suggest the same guess while process A is running its new workflow.
        # Therefore, process A must reserve the guess. Line below releases
        # reservation on this document in case of workflow failure or end of
        # workflow.
        self.c.delete_one({"x": x, "y": "reserved"})

        # fetch additional attributes for constructing ML model
        z = self.get_z(x, *self.get_z_args, **self.get_z_kwargs)

        # use all possible training points as default
        n_completed = self.c.count_documents(self._completed)
        if not self.n_train_pts or self.n_train_pts > n_completed:
            self.n_train_pts = n_completed

        # check if opimization should be done, if in batch mode
        batch_mode = False if self.batch_size == 1 else True
        batch_ready = (n_completed not in (0, 1)
                       and (n_completed + 1) % self.batch_size == 0)

        x = convert_native(x)
        y = convert_native(y)
        z = convert_native(z)

        if batch_mode and not batch_ready:
            # 'None' predictor means this job was not used for
            # an optimization run.
            if self.c.find_one({"x": x}):
                if self.c.find_one({"x": x, "y": "reserved"}):
                    # For reserved guesses: update everything
                    self.c.find_one_and_update(
                        {
                            "x": x,
                            "y": "reserved"
                        },
                        {
                            "$set": {
                                "y": y,
                                "z": z,
                                "z_new": [],
                                "x_new": [],
                                "predictor": None,
                                "index": n_completed + 1,
                            }
                        },
                    )
                else:
                    # For completed guesses (ie, this workflow
                    # is a forced duplicate), do not update
                    # index, but update everything else
                    self.c.find_one_and_update(
                        {"x": x},
                        {
                            "$set": {
                                "y": y,
                                "z": z,
                                "z_new": [],
                                "x_new": [],
                                "predictor": None,
                            }
                        },
                    )
            else:
                # For new guesses: insert x, y, z, index,
                # predictor, and dummy new guesses
                self.c.insert_one({
                    "x": x,
                    "y": y,
                    "z": z,
                    "x_new": [],
                    "z_new": [],
                    "predictor": None,
                    "index": n_completed + 1,
                })
            self.pop_lock(manager_id)
            raise BatchNotReadyError

        # Mongo aggregation framework may give duplicate documents, so we cannot
        # use $sample to randomize the training points used
        searched_indices = random.sample(range(1, n_completed + 1),
                                         self.n_train_pts)
        searched_docs = self.c.find({"index": {
            "$in": searched_indices
        }},
                                    batch_size=10000)
        reserved_docs = self.c.find({"y": "reserved"}, batch_size=10000)
        reserved = []
        for doc in reserved_docs:
            reserved.append(doc["x"])
        all_y = [None] * n_completed
        all_y.append(y)
        all_x_searched = [None] * n_completed
        all_x_searched.append(x)
        z = list(z)
        all_xz_searched = [None] * n_completed
        all_xz_searched.append(x + z)
        for i, doc in enumerate(searched_docs):
            all_x_searched[i] = doc["x"]
            all_xz_searched[i] = doc["x"] + doc["z"]
            all_y[i] = doc["y"]

        all_x_space = self._discretize_space(self.x_dims)
        all_x_space = list(all_x_space) if self.z_file else all_x_space
        all_x_unsearched = []
        for xi in all_x_space:
            xj = list(xi)
            if xj not in all_x_searched and xj not in reserved:
                all_x_unsearched.append(xj)
                if len(all_x_unsearched) == self.n_search_pts:
                    break

        if self.z_file:
            if path.exists(self.z_file):
                with open(self.z_file, "rb") as f:
                    xz_map = pickle.load(f)
            else:
                xz_map = {
                    tuple(xi): self.get_z(xi, *self.get_z_args,
                                          **self.get_z_kwargs)
                    for xi in all_x_space
                }
                with open(self.z_file, "wb") as f:
                    pickle.dump(xz_map, f)

            all_xz_unsearched = [
                xi + xz_map[tuple(xi)] for xi in all_x_unsearched
            ]
        else:
            all_xz_unsearched = [
                xi + self.get_z(xi, *self.get_z_args, **self.get_z_kwargs)
                for xi in all_x_unsearched
            ]

        # if there are no more unsearched points in the entire
        # space, either they have been searched (ie have x, y,
        # and z) or have been reserved.
        if len(all_xz_unsearched) < 1:
            if self.is_discrete_all:
                raise ExhaustedSpaceError(
                    "The discrete space has been searched"
                    " exhaustively.")
            else:
                raise TypeError("A comprehensive list of points was exhausted "
                                "but the dimensions are not discrete.")
        z_dims = self._z_dims(all_xz_unsearched, all_xz_searched)
        xz_dims = self.x_dims + z_dims

        # run machine learner on Z or X features
        if self.predictor in self.builtin_predictors:
            model = self.builtin_predictors[self.predictor]
            all_xz_searched = self._encode(all_xz_searched, xz_dims)
            all_xz_unsearched = self._encode(all_xz_unsearched, xz_dims)
            all_xz_new_onehot = []
            for _ in range(self.batch_size):
                xz1h = self._predict(
                    all_xz_searched,
                    all_y,
                    all_xz_unsearched,
                    model(*self.predictor_args, **self.predictor_kwargs),
                    self.maximize,
                    scaling=True,
                )
                ix = all_xz_unsearched.index(xz1h)
                all_xz_unsearched.pop(ix)
                all_xz_new_onehot.append(xz1h)
            all_xz_new = [
                self._decode(xz_onehot, xz_dims)
                for xz_onehot in all_xz_new_onehot
            ]

        elif self.predictor == "random":
            all_xz_new = random.sample(all_xz_unsearched, self.batch_size)

        else:
            # If using a custom predictor, automatically convert
            # categorical info to one-hot encoded ints.
            # Used when a custom predictor cannot natively use
            # categorical info
            if self.onehot_categorical:
                all_xz_searched = self._encode(all_xz_searched, xz_dims)
                all_xz_unsearched = self._encode(all_xz_unsearched, xz_dims)

            try:
                predictor_fun = deserialize(self.predictor)
            except Exception as E:
                raise NameError("The custom predictor {} didnt import "
                                "correctly!\n{}".format(self.predictor, E))

            all_xz_new = predictor_fun(
                all_xz_searched,
                all_y,
                self.x_dims,
                all_xz_unsearched,
                *self.predictor_args,
                **self.predictor_kwargs,
            )
            if self.onehot_categorical:
                all_xz_new = self._decode(all_xz_new, xz_dims)

            if not isinstance(all_xz_new[0], (list, tuple)):
                all_xz_new = [all_xz_new]

        # duplicate checking for custom optimizer functions
        if self.duplicate_check:

            if not self.enforce_sequential:
                raise ValueError(
                    "Duplicate checking cannot work when "
                    "optimizations are not enforced sequentially.")
            if (self.predictor not in self.builtin_predictors
                    and self.predictor != "random"):
                all_x_new = [
                    split_xz(xz_new, self.x_dims, x_only=True)
                    for xz_new in all_xz_new
                ]
                all_x_searched = [
                    split_xz(xz, self.x_dims, x_only=True)
                    for xz in all_xz_searched
                ]
                if self.tolerances:
                    for n, x_new in enumerate(all_x_new):
                        if is_duplicate_by_tolerance(
                                x_new, all_x_searched,
                                tolerances=self.tolerances):
                            all_xz_new[n] = random.choice(all_xz_unsearched)
                else:
                    if self.is_discrete_all:
                        # test only for x, not xz because custom predicted z
                        # may not be accounted for
                        for n, x_new in enumerate(all_x_new):
                            if x_new in all_x_searched or x_new == x:
                                all_xz_new[n] = random.choice(
                                    all_xz_unsearched)
                    else:
                        raise ValueError("Define tolerances parameter to "
                                         "duplicate check floats.")
        return x, y, z, all_xz_new, n_completed
Exemple #3
0
    def stash(self, x, y, z, all_xz_new, n_completed):
        """
        Write documents to database after optimization.

        Args:
            x (iterable): The current x guess.
            y: (iterable): The current y (objective function) value
            z (iterable): The z vector associated with x
            all_xz_new ([list] or [tuple]): The predicted next best guess(es),
                including their associated z vectors
            n_completed (int): The number of completed guesses/workflows

        Returns:
            opt_id (pymongo InsertedOneResult): The result of the insertion
                of the new optimization document in the database. If multiple
                opt_ids are valid (ie batch mode is enabled), the last opt_id
                is returned.
        """

        for xz_new in all_xz_new:
            # separate 'predicted' z features from the new x vector
            x_new, z_new = split_xz(xz_new, self.x_dims)
            x_new = convert_native(x_new)
            z_new = convert_native(z_new)

            # if it is a duplicate (such as a forced
            # identical first guess)
            forced_dupe = self.c.find_one({'x': x})

            acqmap = {"ei": "Expected Improvement",
                      "pi": "Probability of Improvement",
                      "lcb": "Lower Confidence Boundary",
                      None: "Highest Value",
                      "maximin": "Maximin Expected "
                                 "Improvement"}
            if self.predictor in self.builtin_predictors:
                predictorstr = self.predictor + " with acquisition: " + acqmap[
                    self.acq]
                if self.n_objs > 1:
                    predictorstr += " using {} objectives".format(self.n_objs)
            else:
                predictorstr = self.predictor
            if forced_dupe:
                # only update the fields which should be updated
                self.c.find_one_and_update(
                    {'x': x},
                    {'$set': {'y': y, 'z': z,
                              'z_new': z_new,
                              'x_new': x_new,
                              'predictor': predictorstr}
                     })
            else:
                # update all the fields, as it is a new document
                self.c.insert_one(
                    {'z': z, 'y': y, 'x': x, 'z_new': z_new, 'x_new': x_new,
                     'predictor': predictorstr, 'index': n_completed + 1})
            # ensure previously fin. workflow results are not overwritten by
            # concurrent predictions
            if self.c.count_documents(
                    {'x': x_new, 'y': {'$exists': 1, '$ne': 'reserved'}}) == 0:
                # reserve the new x to prevent parallel processes from
                # registering it as unsearched, since the next iteration of this
                # process will be exploring it
                res = self.c.insert_one({'x': x_new, 'y': 'reserved'})
                opt_id = res.inserted_id
            else:
                raise ValueError(
                    "The predictor suggested a guess which has already been "
                    "tried: {}".format(x_new))
        return opt_id
Exemple #4
0
    def optimize(self, fw_spec, manager_id):
        """
        Run the optimization algorithm.

        Args:
            fw_spec (dict): The firework spec.
            manager_id (ObjectId): The MongoDB object id of the manager
                document.

        Returns:
            x (iterable): The current x guess.
            y: (iterable): The current y (objective function) value
            z (iterable): The z vector associated with x
            all_xz_new ([list] or [tuple]): The predicted next best guess(es),
                including their associated z vectors
            n_completed (int): The number of completed guesses/workflows
        """
        x = list(fw_spec['_x'])
        y = fw_spec['_y']
        if isinstance(y, (list, tuple)):
            if len(y) == 1:
                y = y[0]
            self.n_objs = len(y)
            if self.acq not in ("maximin", None):
                raise ValueError(
                    "{} is not a valid acquisition function for multiobjective "
                    "optimization".format(self.acq))
        else:
            if self.acq == "maximin":
                raise ValueError(
                    "Maximin is not a valid acquisition function for single "
                    "objective optimization.")
            self.n_objs = 1

        # If process A suggests a certain guess and runs it, process B may
        # suggest the same guess while process A is running its new workflow.
        # Therefore, process A must reserve the guess. Line below releases
        # reservation on this document in case of workflow failure or end of
        # workflow.
        self.c.delete_one({'x': x, 'y': 'reserved'})

        # fetch additional attributes for constructing ML model
        z = self.get_z(x, *self.get_z_args, **self.get_z_kwargs)

        # use all possible training points as default
        n_completed = self.c.count_documents(self._completed)
        if not self.n_train_pts or self.n_train_pts > n_completed:
            self.n_train_pts = n_completed

        # check if opimization should be done, if in batch mode
        batch_mode = False if self.batch_size == 1 else True
        batch_ready = n_completed not in (0, 1) and (
                n_completed + 1) % self.batch_size == 0

        x = convert_native(x)
        y = convert_native(y)
        z = convert_native(z)

        if batch_mode and not batch_ready:
            # 'None' predictor means this job was not used for
            # an optimization run.
            if self.c.find_one({'x': x}):
                if self.c.find_one({'x': x, 'y': 'reserved'}):
                    # For reserved guesses: update everything
                    self.c.find_one_and_update(
                        {'x': x, 'y': 'reserved'},
                        {'$set': {'y': y, 'z': z, 'z_new': [],
                                  'x_new': [],
                                  'predictor': None,
                                  'index': n_completed + 1}
                         })
                else:
                    # For completed guesses (ie, this workflow
                    # is a forced duplicate), do not update
                    # index, but update everything else
                    self.c.find_one_and_update(
                        {'x': x},
                        {'$set': {'y': y, 'z': z, 'z_new': [], 'x_new': [],
                                  'predictor': None}
                         })
            else:
                # For new guesses: insert x, y, z, index,
                # predictor, and dummy new guesses
                self.c.insert_one({'x': x, 'y': y, 'z': z, 'x_new': [],
                                   'z_new': [], 'predictor': None,
                                   'index': n_completed + 1})
            self.pop_lock(manager_id)
            raise BatchNotReadyError

        # Mongo aggregation framework may give duplicate documents, so we cannot
        # use $sample to randomize the training points used
        searched_indices = random.sample(
            range(1, n_completed + 1), self.n_train_pts)
        searched_docs = self.c.find(
            {'index': {'$in': searched_indices}},
            batch_size=10000)
        reserved_docs = self.c.find({'y': 'reserved'}, batch_size=10000)
        reserved = []
        for doc in reserved_docs:
            reserved.append(doc['x'])
        all_y = [None] * n_completed
        all_y.append(y)
        all_x_searched = [None] * n_completed
        all_x_searched.append(x)
        z = list(z)
        all_xz_searched = [None] * n_completed
        all_xz_searched.append(x + z)
        for i, doc in enumerate(searched_docs):
            all_x_searched[i] = doc['x']
            all_xz_searched[i] = doc['x'] + doc['z']
            all_y[i] = doc['y']

        all_x_space = self._discretize_space(self.x_dims)
        all_x_space = list(all_x_space) if self.z_file else all_x_space
        all_x_unsearched = []
        for xi in all_x_space:
            xj = list(xi)
            if xj not in all_x_searched and xj not in reserved:
                all_x_unsearched.append(xj)
                if len(all_x_unsearched) == self.n_search_pts:
                    break

        if self.z_file:
            if path.exists(self.z_file):
                with open(self.z_file, 'rb') as f:
                    xz_map = pickle.load(f)
            else:
                xz_map = {tuple(xi): self.get_z(xi, *self.get_z_args,
                                                **self.get_z_kwargs)
                          for xi in all_x_space}
                with open(self.z_file, 'wb') as f:
                    pickle.dump(xz_map, f)

            all_xz_unsearched = [xi + xz_map[tuple(xi)] for xi in
                                 all_x_unsearched]
        else:
            all_xz_unsearched = [
                xi + self.get_z(xi, *self.get_z_args, **self.get_z_kwargs) for
                xi in all_x_unsearched]

        # if there are no more unsearched points in the entire
        # space, either they have been searched (ie have x, y,
        # and z) or have been reserved.
        if len(all_xz_unsearched) < 1:
            if self.is_discrete_all:
                raise ExhaustedSpaceError("The discrete space has been searched"
                                          " exhaustively.")
            else:
                raise TypeError("A comprehensive list of points was exhausted "
                                "but the dimensions are not discrete.")
        z_dims = self._z_dims(all_xz_unsearched, all_xz_searched)
        xz_dims = self.x_dims + z_dims

        # run machine learner on Z or X features
        if self.predictor in self.builtin_predictors:
            model = self.builtin_predictors[self.predictor]
            all_xz_searched = self._encode(all_xz_searched, xz_dims)
            all_xz_unsearched = self._encode(all_xz_unsearched, xz_dims)
            all_xz_new_onehot = []
            for _ in range(self.batch_size):
                xz1h = self._predict(all_xz_searched, all_y, all_xz_unsearched,
                                     model(*self.predictor_args,
                                           **self.predictor_kwargs),
                                     self.maximize, scaling=True)
                ix = all_xz_unsearched.index(xz1h)
                all_xz_unsearched.pop(ix)
                all_xz_new_onehot.append(xz1h)
            all_xz_new = [self._decode(xz_onehot, xz_dims) for xz_onehot in
                          all_xz_new_onehot]

        elif self.predictor == 'random':
            all_xz_new = random.sample(all_xz_unsearched, self.batch_size)

        else:
            # If using a custom predictor, automatically convert
            # categorical info to one-hot encoded ints.
            # Used when a custom predictor cannot natively use
            # categorical info
            if self.onehot_categorical:
                all_xz_searched = self._encode(all_xz_searched, xz_dims)
                all_xz_unsearched = self._encode(all_xz_unsearched, xz_dims)

            try:
                predictor_fun = deserialize(self.predictor)
            except Exception as E:
                raise NameError("The custom predictor {} didnt import "
                                "correctly!\n{}".format(self.predictor, E))

            all_xz_new = predictor_fun(all_xz_searched, all_y, self.x_dims,
                                       all_xz_unsearched, *self.predictor_args,
                                       **self.predictor_kwargs)
            if self.onehot_categorical:
                all_xz_new = self._decode(all_xz_new, xz_dims)

            if not isinstance(all_xz_new[0], (list, tuple)):
                all_xz_new = [all_xz_new]

        # duplicate checking for custom optimizer functions
        if self.duplicate_check:

            if not self.enforce_sequential:
                raise ValueError("Duplicate checking cannot work when "
                                 "optimizations are not enforced sequentially.")
            if self.predictor not in self.builtin_predictors and \
                    self.predictor != 'random':
                all_x_new = [split_xz(xz_new, self.x_dims, x_only=True) for
                             xz_new in all_xz_new]
                all_x_searched = [split_xz(xz, self.x_dims, x_only=True) for xz
                                  in all_xz_searched]
                if self.tolerances:
                    for n, x_new in enumerate(all_x_new):
                        if is_duplicate_by_tolerance(x_new, all_x_searched,
                                                     tolerances=self.tolerances):
                            all_xz_new[n] = random.choice(
                                all_xz_unsearched)
                else:
                    if self.is_discrete_all:
                        # test only for x, not xz because custom predicted z
                        # may not be accounted for
                        for n, x_new in enumerate(all_x_new):
                            if x_new in all_x_searched or x_new == x:
                                all_xz_new[n] = random.choice(
                                    all_xz_unsearched)
                    else:
                        raise ValueError("Define tolerances parameter to "
                                         "duplicate check floats.")
        return x, y, z, all_xz_new, n_completed
Exemple #5
0
 def test_convert_native(self):
     a = [np.int(10), np.float(12.2), np.str("a str"), 12.3, 100, "ok"]
     native = convert_native(a)
     self.assertListEqual(
         [type(i) for i in native], [int, float, str, float, int, str]
     )
Exemple #6
0
 def test_convert_native(self):
     a = [np.int(10), np.float(12.2), np.str("a str"), 12.3, 100, "ok"]
     native = convert_native(a)
     self.assertListEqual([type(i) for i in native],
                          [int, float, str, float, int, str])