def test_split_xz(self): x_dims = [(1, 10), (1, 10), (1, 10)] x = [1, 2, 3] z = ["red", "monkey"] xz = x + z x_split, z_split = split_xz(xz, x_dims) self.assertListEqual(x, x_split) self.assertListEqual(z, z_split)
def _z_dims(self, all_xz_unsearched, all_xz_searched): """ Prepare dims to use in preprocessing for categorical dimensions. Gathers a list of possible dimensions from stored and current z vectors. Not actually used for creating a list of possible search points, only for helping to convert possible search points from categorical to integer/float. Args: all_xz_unsearched ([list]): The collection of xz points which have not been searched. all_xz_searched ([list]): The collection of xz points which have been searched. Returns: ([tuple]) dimensions for the z space """ all_z_unsearched = [ split_xz(xz, self.x_dims, z_only=True) for xz in all_xz_unsearched ] all_z_searched = [ split_xz(xz, self.x_dims, z_only=True) for xz in all_xz_searched ] all_z = all_z_searched + all_z_unsearched if not all_z: return [] dims = [(z, z) for z in all_z[0]] for i, dim in enumerate(dims): cat_values = [] for z in all_z: if type(z[i]) in dtypes.others: # the dimension is categorical if z[i] not in cat_values: cat_values.append(z[i]) dims[i] = cat_values return dims
def _z_dims(self, all_xz_unsearched, all_xz_searched): """ Prepare dims to use in preprocessing for categorical dimensions. Gathers a list of possible dimensions from stored and current z vectors. Not actually used for creating a list of possible search points, only for helping to convert possible search points from categorical to integer/float. Args: all_xz_unsearched ([list]): The collection of xz points which have not been searched. all_xz_searched ([list]): The collection of xz points which have been searched. Returns: ([tuple]) dimensions for the z space """ all_z_unsearched = [split_xz(xz, self.x_dims, z_only=True) for xz in all_xz_unsearched] all_z_searched = [split_xz(xz, self.x_dims, z_only=True) for xz in all_xz_searched] all_z = all_z_searched + all_z_unsearched if not all_z: return [] dims = [(z, z) for z in all_z[0]] for i, dim in enumerate(dims): cat_values = [] for z in all_z: if type(z[i]) in dtypes.others: # the dimension is categorical if z[i] not in cat_values: cat_values.append(z[i]) dims[i] = cat_values return dims
def stash(self, x, y, z, all_xz_new, n_completed): """ Write documents to database after optimization. Args: x (iterable): The current x guess. y: (iterable): The current y (objective function) value z (iterable): The z vector associated with x all_xz_new ([list] or [tuple]): The predicted next best guess(es), including their associated z vectors n_completed (int): The number of completed guesses/workflows Returns: opt_id (pymongo InsertedOneResult): The result of the insertion of the new optimization document in the database. If multiple opt_ids are valid (ie batch mode is enabled), the last opt_id is returned. """ for xz_new in all_xz_new: # separate 'predicted' z features from the new x vector x_new, z_new = split_xz(xz_new, self.x_dims) x_new = convert_native(x_new) z_new = convert_native(z_new) # if it is a duplicate (such as a forced # identical first guess) forced_dupe = self.c.find_one({"x": x}) acqmap = { "ei": "Expected Improvement", "pi": "Probability of Improvement", "lcb": "Lower Confidence Boundary", None: "Highest Value", "maximin": "Maximin Expected " "Improvement", } if self.predictor in self.builtin_predictors: predictorstr = (self.predictor + " with acquisition: " + acqmap[self.acq]) if self.n_objs > 1: predictorstr += " using {} objectives".format(self.n_objs) else: predictorstr = self.predictor if forced_dupe: # only update the fields which should be updated self.c.find_one_and_update( {"x": x}, { "$set": { "y": y, "z": z, "z_new": z_new, "x_new": x_new, "predictor": predictorstr, } }, ) else: # update all the fields, as it is a new document self.c.insert_one({ "z": z, "y": y, "x": x, "z_new": z_new, "x_new": x_new, "predictor": predictorstr, "index": n_completed + 1, }) # ensure previously fin. workflow results are not overwritten by # concurrent predictions if (self.c.count_documents({ "x": x_new, "y": { "$exists": 1, "$ne": "reserved" } }) == 0): # reserve the new x to prevent parallel processes from # registering it as unsearched, since the next iteration of this # process will be exploring it res = self.c.insert_one({"x": x_new, "y": "reserved"}) opt_id = res.inserted_id else: raise ValueError( "The predictor suggested a guess which has already been " "tried: {}".format(x_new)) return opt_id
def optimize(self, fw_spec, manager_id): """ Run the optimization algorithm. Args: fw_spec (dict): The firework spec. manager_id (ObjectId): The MongoDB object id of the manager document. Returns: x (iterable): The current x guess. y: (iterable): The current y (objective function) value z (iterable): The z vector associated with x all_xz_new ([list] or [tuple]): The predicted next best guess(es), including their associated z vectors n_completed (int): The number of completed guesses/workflows """ x = list(fw_spec["_x"]) y = fw_spec["_y"] if isinstance(y, (list, tuple)): if len(y) == 1: y = y[0] self.n_objs = len(y) if self.acq not in ("maximin", None): raise ValueError( "{} is not a valid acquisition function for multiobjective " "optimization".format(self.acq)) else: if self.acq == "maximin": raise ValueError( "Maximin is not a valid acquisition function for single " "objective optimization.") self.n_objs = 1 # If process A suggests a certain guess and runs it, process B may # suggest the same guess while process A is running its new workflow. # Therefore, process A must reserve the guess. Line below releases # reservation on this document in case of workflow failure or end of # workflow. self.c.delete_one({"x": x, "y": "reserved"}) # fetch additional attributes for constructing ML model z = self.get_z(x, *self.get_z_args, **self.get_z_kwargs) # use all possible training points as default n_completed = self.c.count_documents(self._completed) if not self.n_train_pts or self.n_train_pts > n_completed: self.n_train_pts = n_completed # check if opimization should be done, if in batch mode batch_mode = False if self.batch_size == 1 else True batch_ready = (n_completed not in (0, 1) and (n_completed + 1) % self.batch_size == 0) x = convert_native(x) y = convert_native(y) z = convert_native(z) if batch_mode and not batch_ready: # 'None' predictor means this job was not used for # an optimization run. if self.c.find_one({"x": x}): if self.c.find_one({"x": x, "y": "reserved"}): # For reserved guesses: update everything self.c.find_one_and_update( { "x": x, "y": "reserved" }, { "$set": { "y": y, "z": z, "z_new": [], "x_new": [], "predictor": None, "index": n_completed + 1, } }, ) else: # For completed guesses (ie, this workflow # is a forced duplicate), do not update # index, but update everything else self.c.find_one_and_update( {"x": x}, { "$set": { "y": y, "z": z, "z_new": [], "x_new": [], "predictor": None, } }, ) else: # For new guesses: insert x, y, z, index, # predictor, and dummy new guesses self.c.insert_one({ "x": x, "y": y, "z": z, "x_new": [], "z_new": [], "predictor": None, "index": n_completed + 1, }) self.pop_lock(manager_id) raise BatchNotReadyError # Mongo aggregation framework may give duplicate documents, so we cannot # use $sample to randomize the training points used searched_indices = random.sample(range(1, n_completed + 1), self.n_train_pts) searched_docs = self.c.find({"index": { "$in": searched_indices }}, batch_size=10000) reserved_docs = self.c.find({"y": "reserved"}, batch_size=10000) reserved = [] for doc in reserved_docs: reserved.append(doc["x"]) all_y = [None] * n_completed all_y.append(y) all_x_searched = [None] * n_completed all_x_searched.append(x) z = list(z) all_xz_searched = [None] * n_completed all_xz_searched.append(x + z) for i, doc in enumerate(searched_docs): all_x_searched[i] = doc["x"] all_xz_searched[i] = doc["x"] + doc["z"] all_y[i] = doc["y"] all_x_space = self._discretize_space(self.x_dims) all_x_space = list(all_x_space) if self.z_file else all_x_space all_x_unsearched = [] for xi in all_x_space: xj = list(xi) if xj not in all_x_searched and xj not in reserved: all_x_unsearched.append(xj) if len(all_x_unsearched) == self.n_search_pts: break if self.z_file: if path.exists(self.z_file): with open(self.z_file, "rb") as f: xz_map = pickle.load(f) else: xz_map = { tuple(xi): self.get_z(xi, *self.get_z_args, **self.get_z_kwargs) for xi in all_x_space } with open(self.z_file, "wb") as f: pickle.dump(xz_map, f) all_xz_unsearched = [ xi + xz_map[tuple(xi)] for xi in all_x_unsearched ] else: all_xz_unsearched = [ xi + self.get_z(xi, *self.get_z_args, **self.get_z_kwargs) for xi in all_x_unsearched ] # if there are no more unsearched points in the entire # space, either they have been searched (ie have x, y, # and z) or have been reserved. if len(all_xz_unsearched) < 1: if self.is_discrete_all: raise ExhaustedSpaceError( "The discrete space has been searched" " exhaustively.") else: raise TypeError("A comprehensive list of points was exhausted " "but the dimensions are not discrete.") z_dims = self._z_dims(all_xz_unsearched, all_xz_searched) xz_dims = self.x_dims + z_dims # run machine learner on Z or X features if self.predictor in self.builtin_predictors: model = self.builtin_predictors[self.predictor] all_xz_searched = self._encode(all_xz_searched, xz_dims) all_xz_unsearched = self._encode(all_xz_unsearched, xz_dims) all_xz_new_onehot = [] for _ in range(self.batch_size): xz1h = self._predict( all_xz_searched, all_y, all_xz_unsearched, model(*self.predictor_args, **self.predictor_kwargs), self.maximize, scaling=True, ) ix = all_xz_unsearched.index(xz1h) all_xz_unsearched.pop(ix) all_xz_new_onehot.append(xz1h) all_xz_new = [ self._decode(xz_onehot, xz_dims) for xz_onehot in all_xz_new_onehot ] elif self.predictor == "random": all_xz_new = random.sample(all_xz_unsearched, self.batch_size) else: # If using a custom predictor, automatically convert # categorical info to one-hot encoded ints. # Used when a custom predictor cannot natively use # categorical info if self.onehot_categorical: all_xz_searched = self._encode(all_xz_searched, xz_dims) all_xz_unsearched = self._encode(all_xz_unsearched, xz_dims) try: predictor_fun = deserialize(self.predictor) except Exception as E: raise NameError("The custom predictor {} didnt import " "correctly!\n{}".format(self.predictor, E)) all_xz_new = predictor_fun( all_xz_searched, all_y, self.x_dims, all_xz_unsearched, *self.predictor_args, **self.predictor_kwargs, ) if self.onehot_categorical: all_xz_new = self._decode(all_xz_new, xz_dims) if not isinstance(all_xz_new[0], (list, tuple)): all_xz_new = [all_xz_new] # duplicate checking for custom optimizer functions if self.duplicate_check: if not self.enforce_sequential: raise ValueError( "Duplicate checking cannot work when " "optimizations are not enforced sequentially.") if (self.predictor not in self.builtin_predictors and self.predictor != "random"): all_x_new = [ split_xz(xz_new, self.x_dims, x_only=True) for xz_new in all_xz_new ] all_x_searched = [ split_xz(xz, self.x_dims, x_only=True) for xz in all_xz_searched ] if self.tolerances: for n, x_new in enumerate(all_x_new): if is_duplicate_by_tolerance( x_new, all_x_searched, tolerances=self.tolerances): all_xz_new[n] = random.choice(all_xz_unsearched) else: if self.is_discrete_all: # test only for x, not xz because custom predicted z # may not be accounted for for n, x_new in enumerate(all_x_new): if x_new in all_x_searched or x_new == x: all_xz_new[n] = random.choice( all_xz_unsearched) else: raise ValueError("Define tolerances parameter to " "duplicate check floats.") return x, y, z, all_xz_new, n_completed
def run_task(self, fw_spec): """ FireTask for running an optimization loop. Args: fw_spec (dict): the firetask spec. Must contain a '_y' key with a float type field and must contain a '_x' key containing a vector uniquely defining the point in search space. Returns: (FWAction) A workflow based on the workflow creator and a new, optimized guess. """ pid = f"{getpid()}@{gethostname()}" sleeptime = 0.01 max_runs = int(self.timeout / sleeptime) max_resets = 3 # Running stepwise optimization for concurrent processes requires a # manual 'lock' on the optimization database to prevent duplicate # guesses. The first process sets up a manager document which handles # locking and queueing processes by PID. The single, active process in # the lock is free to access optimization data; the queue of the manager # holds parallel process PIDs waiting to access the db. When the active # process finishes, it removes itself from the lock and moves the first # queue PID into the lock, allowing the next process to begin # optimization. Each process continually tries to either queue or place # itself into the lock if not active. for run in range(max_resets * max_runs): manager_count = self.c.count_documents(self._manager) if manager_count == 0: self.c.insert_one({ "lock": pid, "queue": [], "doctype": "manager" }) elif manager_count == 1: # avoid bootup problems if manager lock is being deleted # concurrently with this check try: manager = self.c.find_one(self._manager) manager_id = manager["_id"] lock = manager["lock"] except TypeError: continue if lock is None: self.c.find_one_and_update({"_id": manager_id}, {"$set": { "lock": pid }}) elif self.enforce_sequential and lock != pid: if pid not in manager["queue"]: # avoid bootup problems if manager queue is being # deleted concurrently with this check try: self.c.find_one_and_update( {"_id": manager_id}, {"$push": { "queue": pid }}) except TypeError: continue else: sleep(sleeptime) elif not self.enforce_sequential or (self.enforce_sequential and lock == pid): try: x, y, z, all_xz_new, n_completed = self.optimize( fw_spec, manager_id) except BatchNotReadyError: return None except Exception: self.pop_lock(manager_id) raise # make sure a process has not timed out and changed the lock # pid while this process is computing the next guess try: if (self.c.find_one(self._manager)["lock"] != pid or self.c.count_documents(self._manager) == 0): continue else: opt_id = self.stash(x, y, z, all_xz_new, n_completed) except TypeError as E: warnings.warn( "Process {} probably timed out while " "computing next guess, with exception {}." " Try shortening the training time or " "lengthening the timeout for OptTask!" "".format(pid, E), RuntimeWarning, ) raise E # continue self.pop_lock(manager_id) all_x_new = [ split_xz(xz_new, self.x_dims, x_only=True) for xz_new in all_xz_new ] if not isinstance(self.wf_creator_args, (list, tuple)): raise TypeError( "wf_creator_args should be a list/tuple of " "positional arguments.") if not isinstance(self.wf_creator_kwargs, dict): raise TypeError( "wf_creator_kwargs should be a dictionary of " "keyword arguments.") new_wfs = [ self.wf_creator(x_new, *self.wf_creator_args, **self.wf_creator_kwargs) for x_new in all_x_new ] for wf in new_wfs: self.lpad.add_wf(wf) return FWAction( update_spec={"_optimization_id": opt_id}, stored_data={"_optimization_id": opt_id}, ) else: # Delete the manager that this has created self.c.delete_one({"lock": pid}) if run in [max_runs * k for k in range(1, max_resets)]: self.c.find_one_and_update( self._manager, {"$set": { "lock": None, "queue": [] }}) elif run == max_runs * max_resets: raise Exception("The manager is still stuck after " "resetting. Make sure no stalled processes " "are in the queue.")
def stash(self, x, y, z, all_xz_new, n_completed): """ Write documents to database after optimization. Args: x (iterable): The current x guess. y: (iterable): The current y (objective function) value z (iterable): The z vector associated with x all_xz_new ([list] or [tuple]): The predicted next best guess(es), including their associated z vectors n_completed (int): The number of completed guesses/workflows Returns: opt_id (pymongo InsertedOneResult): The result of the insertion of the new optimization document in the database. If multiple opt_ids are valid (ie batch mode is enabled), the last opt_id is returned. """ for xz_new in all_xz_new: # separate 'predicted' z features from the new x vector x_new, z_new = split_xz(xz_new, self.x_dims) x_new = convert_native(x_new) z_new = convert_native(z_new) # if it is a duplicate (such as a forced # identical first guess) forced_dupe = self.c.find_one({'x': x}) acqmap = {"ei": "Expected Improvement", "pi": "Probability of Improvement", "lcb": "Lower Confidence Boundary", None: "Highest Value", "maximin": "Maximin Expected " "Improvement"} if self.predictor in self.builtin_predictors: predictorstr = self.predictor + " with acquisition: " + acqmap[ self.acq] if self.n_objs > 1: predictorstr += " using {} objectives".format(self.n_objs) else: predictorstr = self.predictor if forced_dupe: # only update the fields which should be updated self.c.find_one_and_update( {'x': x}, {'$set': {'y': y, 'z': z, 'z_new': z_new, 'x_new': x_new, 'predictor': predictorstr} }) else: # update all the fields, as it is a new document self.c.insert_one( {'z': z, 'y': y, 'x': x, 'z_new': z_new, 'x_new': x_new, 'predictor': predictorstr, 'index': n_completed + 1}) # ensure previously fin. workflow results are not overwritten by # concurrent predictions if self.c.count_documents( {'x': x_new, 'y': {'$exists': 1, '$ne': 'reserved'}}) == 0: # reserve the new x to prevent parallel processes from # registering it as unsearched, since the next iteration of this # process will be exploring it res = self.c.insert_one({'x': x_new, 'y': 'reserved'}) opt_id = res.inserted_id else: raise ValueError( "The predictor suggested a guess which has already been " "tried: {}".format(x_new)) return opt_id
def optimize(self, fw_spec, manager_id): """ Run the optimization algorithm. Args: fw_spec (dict): The firework spec. manager_id (ObjectId): The MongoDB object id of the manager document. Returns: x (iterable): The current x guess. y: (iterable): The current y (objective function) value z (iterable): The z vector associated with x all_xz_new ([list] or [tuple]): The predicted next best guess(es), including their associated z vectors n_completed (int): The number of completed guesses/workflows """ x = list(fw_spec['_x']) y = fw_spec['_y'] if isinstance(y, (list, tuple)): if len(y) == 1: y = y[0] self.n_objs = len(y) if self.acq not in ("maximin", None): raise ValueError( "{} is not a valid acquisition function for multiobjective " "optimization".format(self.acq)) else: if self.acq == "maximin": raise ValueError( "Maximin is not a valid acquisition function for single " "objective optimization.") self.n_objs = 1 # If process A suggests a certain guess and runs it, process B may # suggest the same guess while process A is running its new workflow. # Therefore, process A must reserve the guess. Line below releases # reservation on this document in case of workflow failure or end of # workflow. self.c.delete_one({'x': x, 'y': 'reserved'}) # fetch additional attributes for constructing ML model z = self.get_z(x, *self.get_z_args, **self.get_z_kwargs) # use all possible training points as default n_completed = self.c.count_documents(self._completed) if not self.n_train_pts or self.n_train_pts > n_completed: self.n_train_pts = n_completed # check if opimization should be done, if in batch mode batch_mode = False if self.batch_size == 1 else True batch_ready = n_completed not in (0, 1) and ( n_completed + 1) % self.batch_size == 0 x = convert_native(x) y = convert_native(y) z = convert_native(z) if batch_mode and not batch_ready: # 'None' predictor means this job was not used for # an optimization run. if self.c.find_one({'x': x}): if self.c.find_one({'x': x, 'y': 'reserved'}): # For reserved guesses: update everything self.c.find_one_and_update( {'x': x, 'y': 'reserved'}, {'$set': {'y': y, 'z': z, 'z_new': [], 'x_new': [], 'predictor': None, 'index': n_completed + 1} }) else: # For completed guesses (ie, this workflow # is a forced duplicate), do not update # index, but update everything else self.c.find_one_and_update( {'x': x}, {'$set': {'y': y, 'z': z, 'z_new': [], 'x_new': [], 'predictor': None} }) else: # For new guesses: insert x, y, z, index, # predictor, and dummy new guesses self.c.insert_one({'x': x, 'y': y, 'z': z, 'x_new': [], 'z_new': [], 'predictor': None, 'index': n_completed + 1}) self.pop_lock(manager_id) raise BatchNotReadyError # Mongo aggregation framework may give duplicate documents, so we cannot # use $sample to randomize the training points used searched_indices = random.sample( range(1, n_completed + 1), self.n_train_pts) searched_docs = self.c.find( {'index': {'$in': searched_indices}}, batch_size=10000) reserved_docs = self.c.find({'y': 'reserved'}, batch_size=10000) reserved = [] for doc in reserved_docs: reserved.append(doc['x']) all_y = [None] * n_completed all_y.append(y) all_x_searched = [None] * n_completed all_x_searched.append(x) z = list(z) all_xz_searched = [None] * n_completed all_xz_searched.append(x + z) for i, doc in enumerate(searched_docs): all_x_searched[i] = doc['x'] all_xz_searched[i] = doc['x'] + doc['z'] all_y[i] = doc['y'] all_x_space = self._discretize_space(self.x_dims) all_x_space = list(all_x_space) if self.z_file else all_x_space all_x_unsearched = [] for xi in all_x_space: xj = list(xi) if xj not in all_x_searched and xj not in reserved: all_x_unsearched.append(xj) if len(all_x_unsearched) == self.n_search_pts: break if self.z_file: if path.exists(self.z_file): with open(self.z_file, 'rb') as f: xz_map = pickle.load(f) else: xz_map = {tuple(xi): self.get_z(xi, *self.get_z_args, **self.get_z_kwargs) for xi in all_x_space} with open(self.z_file, 'wb') as f: pickle.dump(xz_map, f) all_xz_unsearched = [xi + xz_map[tuple(xi)] for xi in all_x_unsearched] else: all_xz_unsearched = [ xi + self.get_z(xi, *self.get_z_args, **self.get_z_kwargs) for xi in all_x_unsearched] # if there are no more unsearched points in the entire # space, either they have been searched (ie have x, y, # and z) or have been reserved. if len(all_xz_unsearched) < 1: if self.is_discrete_all: raise ExhaustedSpaceError("The discrete space has been searched" " exhaustively.") else: raise TypeError("A comprehensive list of points was exhausted " "but the dimensions are not discrete.") z_dims = self._z_dims(all_xz_unsearched, all_xz_searched) xz_dims = self.x_dims + z_dims # run machine learner on Z or X features if self.predictor in self.builtin_predictors: model = self.builtin_predictors[self.predictor] all_xz_searched = self._encode(all_xz_searched, xz_dims) all_xz_unsearched = self._encode(all_xz_unsearched, xz_dims) all_xz_new_onehot = [] for _ in range(self.batch_size): xz1h = self._predict(all_xz_searched, all_y, all_xz_unsearched, model(*self.predictor_args, **self.predictor_kwargs), self.maximize, scaling=True) ix = all_xz_unsearched.index(xz1h) all_xz_unsearched.pop(ix) all_xz_new_onehot.append(xz1h) all_xz_new = [self._decode(xz_onehot, xz_dims) for xz_onehot in all_xz_new_onehot] elif self.predictor == 'random': all_xz_new = random.sample(all_xz_unsearched, self.batch_size) else: # If using a custom predictor, automatically convert # categorical info to one-hot encoded ints. # Used when a custom predictor cannot natively use # categorical info if self.onehot_categorical: all_xz_searched = self._encode(all_xz_searched, xz_dims) all_xz_unsearched = self._encode(all_xz_unsearched, xz_dims) try: predictor_fun = deserialize(self.predictor) except Exception as E: raise NameError("The custom predictor {} didnt import " "correctly!\n{}".format(self.predictor, E)) all_xz_new = predictor_fun(all_xz_searched, all_y, self.x_dims, all_xz_unsearched, *self.predictor_args, **self.predictor_kwargs) if self.onehot_categorical: all_xz_new = self._decode(all_xz_new, xz_dims) if not isinstance(all_xz_new[0], (list, tuple)): all_xz_new = [all_xz_new] # duplicate checking for custom optimizer functions if self.duplicate_check: if not self.enforce_sequential: raise ValueError("Duplicate checking cannot work when " "optimizations are not enforced sequentially.") if self.predictor not in self.builtin_predictors and \ self.predictor != 'random': all_x_new = [split_xz(xz_new, self.x_dims, x_only=True) for xz_new in all_xz_new] all_x_searched = [split_xz(xz, self.x_dims, x_only=True) for xz in all_xz_searched] if self.tolerances: for n, x_new in enumerate(all_x_new): if is_duplicate_by_tolerance(x_new, all_x_searched, tolerances=self.tolerances): all_xz_new[n] = random.choice( all_xz_unsearched) else: if self.is_discrete_all: # test only for x, not xz because custom predicted z # may not be accounted for for n, x_new in enumerate(all_x_new): if x_new in all_x_searched or x_new == x: all_xz_new[n] = random.choice( all_xz_unsearched) else: raise ValueError("Define tolerances parameter to " "duplicate check floats.") return x, y, z, all_xz_new, n_completed
def run_task(self, fw_spec): """ FireTask for running an optimization loop. Args: fw_spec (dict): the firetask spec. Must contain a '_y' key with a float type field and must contain a '_x' key containing a vector uniquely defining the point in search space. Returns: (FWAction) A workflow based on the workflow creator and a new, optimized guess. """ pid = getpid() sleeptime = .01 max_runs = int(self.timeout / sleeptime) max_resets = 3 # Running stepwise optimization for concurrent processes requires a # manual 'lock' on the optimization database to prevent duplicate # guesses. The first process sets up a manager document which handles # locking and queueing processes by PID. The single, active process in # the lock is free to access optimization data; the queue of the manager # holds parallel process PIDs waiting to access the db. When the active # process finishes, it removes itself from the lock and moves the first # queue PID into the lock, allowing the next process to begin # optimization. Each process continually tries to either queue or place # itself into the lock if not active. for run in range(max_resets * max_runs): manager_count = self.c.count_documents(self._manager) if manager_count == 0: self.c.insert_one({'lock': pid, 'queue': [], 'doctype': 'manager'}) elif manager_count == 1: # avoid bootup problems if manager lock is being deleted # concurrently with this check try: manager = self.c.find_one(self._manager) manager_id = manager['_id'] lock = manager['lock'] except TypeError: continue if lock is None: self.c.find_one_and_update({'_id': manager_id}, {'$set': {'lock': pid}}) elif self.enforce_sequential and lock != pid: if pid not in manager['queue']: # avoid bootup problems if manager queue is being # deleted concurrently with this check try: self.c.find_one_and_update({'_id': manager_id}, {'$push': {'queue': pid}} ) except TypeError: continue else: sleep(sleeptime) elif not self.enforce_sequential or \ (self.enforce_sequential and lock == pid): try: x, y, z, all_xz_new, n_completed = \ self.optimize(fw_spec, manager_id) except BatchNotReadyError: return None except Exception: self.pop_lock(manager_id) raise # make sure a process has not timed out and changed the lock # pid while this process is computing the next guess try: if self.c.find_one(self._manager)['lock'] != pid or \ self.c.count_documents(self._manager) == 0: continue else: opt_id = self.stash(x, y, z, all_xz_new, n_completed) except TypeError as E: warnings.warn("Process {} probably timed out while " "computing next guess, with exception {}." " Try shortening the training time or " "lengthening the timeout for OptTask!" "".format(pid, E), RuntimeWarning) raise E # continue self.pop_lock(manager_id) all_x_new = [split_xz(xz_new, self.x_dims, x_only=True) for xz_new in all_xz_new] if not isinstance(self.wf_creator_args, (list, tuple)): raise TypeError( "wr_creator_args should be a list/tuple of " "positional arguments.") if not isinstance(self.wf_creator_kwargs, dict): raise TypeError( "wr_creator_kwargs should be a dictionary of " "keyword arguments.") new_wfs = [self.wf_creator(x_new, *self.wf_creator_args, **self.wf_creator_kwargs) for x_new in all_x_new] for wf in new_wfs: self.lpad.add_wf(wf) return FWAction(update_spec={'_optimization_id': opt_id}, stored_data={'_optimization_id': opt_id}) else: # Delete the manager that this has created self.c.delete_one({'lock': pid}) if run in [max_runs * k for k in range(1, max_resets)]: self.c.find_one_and_update(self._manager, {'$set': {'lock': None, 'queue': []}} ) elif run == max_runs * max_resets: raise Exception("The manager is still stuck after " "resetting. Make sure no stalled processes " "are in the queue.")