def getGurobiEnvironment(config, retries=10): """ Create a new license environment Input: config: config file. Output: environment object Notes: if config["ENVIRONMENT"] is "GAM" it uses the free license. """ clogging.setup(syslog=True) logging.info("Creating environment...") os.environ[GRB_LICENSE_FILE] = os.path.expandvars( config[GUROBI][GUROBI_LIC]) cluster = config.get(ENVIRONMENT, CLUSTER_OPTION, fallback=CENSUS_CLUSTER) env = None rand_wait_base = np.random.uniform(1, 3) attempts = 0 success = False while (not success) and attempts < retries: try: if cluster == GAM_CLUSTER: env = gb.Env() else: logfile = os.path.expandvars( config[GUROBI][GUROBI_LOGFILE_NAME]) env1 = config[ENVIRONMENT][GRB_ISV_NAME] env2 = config[ENVIRONMENT][GRB_APP_NAME] env3 = int(config[ENVIRONMENT][GRB_ENV3]) env4 = config[ENVIRONMENT][GRB_ENV4].strip() env = gb.Env.OtherEnv(logfile, env1, env2, env3, env4) success = True except gb.GurobiError as err: attempts += 1 if attempts == retries: raise err rand_wait = 1.3**(attempts - 1) * rand_wait_base time.sleep(rand_wait / 1000) if cluster == GAM_CLUSTER: logging.debug("gurobi environment creation succeeded on attempt %s", attempts) else: logging.debug( "Successfully connected to Gurobi token server on attempt %s", attempts) return env
def geoimp_wrapper_root(*, config, parent_shape, root_node: GeounitNode, optimizers, min_schema=None, keep_debug_info=False): """ This function performs the Post-Processing Step of Root Geonode (e.g. US or a State) to Root Geonode level. It is called from engine_utils.py:topdown in a Spark map operation Inputs: config: configuration object root_node: a GeounitNode object referring to the top/root node of the geographical tree (e.g. US, US+PR or a single state for state-size runs) Output: root_node: a GeounitNode object referring to the top/root node of the geographical tree (e.g. US, US+PR or a single state for state-size runs) :param optimizers: """ # Make sure that the logger is set up on all of the nodes clogging.setup(level=logging.INFO) # t_start = time.time() parent_hist = [None] * len(parent_shape) children = [root_node.unzipNoisy()] children, backup_solve_status = makeInputsAndRunOptimizer( children, config, min_schema, parent_hist, parent_shape, "root_to_root", optimizers, keep_debug_info=keep_debug_info) return children[0]
def geoimp_wrapper(*, config, parent_child_node, accum, min_schema=None): """ This function performs the Post-Processing Step for a generic parent to the Child geography. It is called from topdown_engine.py:topdown in a Spark map operation. It runs on the CORE and TASK nodes, not on the MASTER. So there is no das object! Inputs: config: configuration object parent_child_node: a (k,v) RDD with key being a geocode and value being a tuple of GeounitNode objects containing one parent and multiple children accum: spark accumulator object which tracks the number of solves that use the backup solve Output: children: a list of Node objects for each of the children, after post-processing """ # Make sure that the logger is set up on all the nodes clogging.setup(level=logging.INFO, syslog='True', syslog_address=(das_utils.getMasterIp(), C.SYSLOG_UDP)) parent: GeounitNode children: List[GeounitNode] parent, children = findParentChildNodes(parent_child_node) n_children = len(children) ####### # under cenrtain circumstances we can skip the gurobi optimization ####### # # Only 1 child if n_children == 1: children[0].syn = parent.syn return children if parent.syn.sum() == 0: for child in children: child.syn = sparse.multiSparse(np.zeros(parent.syn.shape)) return children ######### # resume code for gurobi optimization ######## # stack the dp arrays on top of one another, if only 1 child just expand the axis if parent.dp: if n_children > 1: noisy_child = np.stack( [asDense(child.dp.DPanswer) for child in children], axis=-1) else: noisy_child = np.expand_dims(asDense(children[0].dp.DPanswer), axis=len( children[0].dp.DPanswer.shape)) else: noisy_child = None noisy_child_weight = 1. / children[0].dp.Var if parent.dp else None # TODO: Maybe filtering out the detailed querty form node.dp_queries can be done neater dp_queries_comb = stackNodeProperties(children, lambda node: node.dp_queries, cons_dpq.StackedDPquery, lambda name: name != C.DETAILED) query_weights = map( lambda sdpq: 1. / sdpq.Var, dp_queries_comb ) # We can get actual variance for each query if we want constraints_comb = stackNodeProperties(children, lambda node: node.cons, cons_dpq.StackedConstraint) parent_hist = parent.getDenseSyn() parent_geocode = parent.geocode seq_opt = sequential_optimizers.L2PlusRounderWithBackup( das=None, config=config, parent=parent_hist, parent_shape=parent_hist.shape, NoisyChild=noisy_child, childGeoLen=n_children, DPqueries=dp_queries_comb, constraints=constraints_comb, NoisyChild_weight=noisy_child_weight, query_weights=query_weights, identifier=parent_geocode, min_schema=min_schema, stat_node=children[0]) l2_answer, int_answer, backup_solve_status = seq_opt.run() # slice off the combined child solution to make separate arrays for each child int_answer_list = np_utils.sliceArray(int_answer) l2_answer_list = np_utils.sliceArray(l2_answer) # check constraints for i, child in enumerate(children): child.syn = int_answer_list[i] constraintsCheck(child) # make sparse arrays for i, child in enumerate(children): child.syn = sparse.multiSparse(int_answer_list[i]) child.syn_unrounded = sparse.multiSparse(l2_answer_list[i]) if backup_solve_status is True: accum += 1 return children
def geoimp_wrapper_nat(*, config, parent_shape, nat_node: GeounitNode, min_schema=None): """ This function performs the Post-Processing Step of National to National level. It is called from engine_utils.py:topdown in a Spark map operation Inputs: config: configuration object nat_node: a GeounitNode object referring to entire nation Output: nat_node: a GeounitNode object referring to entire nation """ # Make sure that the logger is set up on all of the nodes clogging.setup(level=logging.INFO, syslog=True, syslog_address=(das_utils.getMasterIp(), C.SYSLOG_UDP)) # t_start = time.time() parent_hist = None noisy_child = np.expand_dims( asDense(nat_node.dp.DPanswer), axis=len( nat_node.dp.DPanswer.shape)) if nat_node.dp else None noisy_child_weight = 1. / nat_node.dp.Var if nat_node.dp else None parent_geocode = "nat_to_nat" # TODO: Maybe filtering out the detailed querty form node.dp_queries can be done neater dp_queries_comb = stackNodeProperties([ nat_node, ], lambda node: node.dp_queries, cons_dpq.StackedDPquery, lambda name: name != C.DETAILED) query_weights = map( lambda sdpq: 1. / sdpq.Var, dp_queries_comb ) # We can get actual variance for each query if we want constraints_comb = stackNodeProperties([ nat_node, ], lambda node: node.cons, cons_dpq.StackedConstraint) # Create an L2PlusRounderWithBackup object seq_opt = sequential_optimizers.L2PlusRounderWithBackup( das=None, parent=parent_hist, parent_shape=parent_shape, NoisyChild=noisy_child, childGeoLen=1, config=config, DPqueries=dp_queries_comb, constraints=constraints_comb, NoisyChild_weight=noisy_child_weight, query_weights=query_weights, identifier="nat_to_nat", min_schema=min_schema, stat_node=nat_node) l2_answer, int_answer, backup_solve_status = seq_opt.run() # get rid of extra dimension int_answer = int_answer.squeeze() l2_answer = l2_answer.squeeze() nat_node.syn = int_answer constraintsCheck(nat_node, parent_geocode) nat_node.syn = sparse.multiSparse(int_answer) nat_node.syn_unrounded = sparse.multiSparse(l2_answer) return nat_node
def geoimp_wrapper(*, config, parent_child_node, accum, optimizers, min_schema=None, keep_debug_info=False, aian=False): """ This function performs the Post-Processing Step for a generic parent to the Child geography. It is called from topdown_engine.py:topdown in a Spark map operation. It runs on the CORE and TASK nodes, not on the MASTER. So there is no das object! Inputs: config: configuration object parent_child_node: a (k,v) RDD with key being a geocode and value being a tuple of GeounitNode objects containing one parent and multiple children accum: spark accumulator object which tracks the number of solves that use the backup solve optimizers: which L2, Rounder and SequentialOptimizer to use, min_schema: backup feasibility schema for the optimizer keep_debug_info: keep dp_queries ans syn_unrounded in the optimized nodes; delete if False aian: if it's AIAN spine, it will keep state total invariance on US -> ({aian_parts_of_states} + {non_aian_parts_of_states}) optimization Output: children: a list of Node objects for each of the children, after post-processing :param optimizers: """ # Make sure that the logger is set up on all the nodes clogging.setup(level=logging.INFO) parent, children = findParentChildNodes(parent_child_node) parent_hist = parent.getDenseSyn(), parent.getDenseSynHousing() parent_geocode = parent.geocode parent_shape = tuple(h.shape for h in parent_hist) ####### # under certain circumstances we can skip the gurobi optimization ####### # Only 1 child if len(children) == 1: children = [children[0].copyParentSyn(parent, keep_debug_info)] return constraintsCheck(children, parent_geocode) # If the parent is empty (NOTE: all histograms should be empty. Also, the sum check obviously works only if values are non-negative) if parent.histsAreEmpty(): children = [ child.copyParentSyn(parent, keep_debug_info, zerosyn=True) for child in children ] print(f"parent geocode {parent_geocode} is empty") return constraintsCheck(children, parent_geocode) ######### # resume code for gurobi optimization ######## children = [child.unzipNoisy() for child in children] children, backup_solve_status = makeInputsAndRunOptimizer( children, config, min_schema, parent_hist, parent_shape, parent_geocode, optimizers, keep_debug_info=keep_debug_info, aian=aian) if backup_solve_status is True: accum += 1 return children
def getGurobiEnvironment(self, retries=C.GUROBI_LICENSE_MAX_RETRIES): """ Create a new license environment IMPORTANT: HAS TO BE NEW ENVIRONMENT, DO NOT TRY TO RETURN ONE ALREADY IN PYTHON OBJECT Input: config: config file. Output: environment object Notes: 1. if config["ENVIRONMENT"] is "GAM" or if ISV_NAME is not sent, create an environment using the public gb.Env() API, which typically uses the academic license. 2. If a license cannot be obtained, implements retries with random backoff. """ # This appears to be the first function called in the python environment on each worker node. # Be sure the enviornment is propertly set up. if self.gurobi_path and self.gurobi_path not in sys.path: sys.path.insert(0, self.gurobi_path) import gurobipy as gb # Syslog does not require the datetime because it # is included automatically by protocol, but syslog does not include the year, so we manually add it. # # NOTE: yarn may not be running on the CORE and TASK nodes # when the bootstrap is run, so attempts to set the MASTER_IP # on the core nodes sometimes failed. We avoid this now by passing the MASTER_IP # in the configuration environment clogging.setup(level=logging.INFO, syslog=True, syslog_address=(das_utils.getMasterIp(), C.SYSLOG_UDP), syslog_format=clogging.YEAR + " " + clogging.SYSLOG_FORMAT) # THIS BELOW SHOULD NOT BE DONE, THERE'S A REASON FOR RE-CREATING THE ENVIRONMENT # # If we already have a grb_env, just return it. # if self.grb_env is not None: # return self.grb_env os.environ[C.GRB_LICENSE_FILE] = self.getconfig(C.GUROBI_LIC) import gurobipy as gb # Get environment variables cluster = self.getconfig(C.CLUSTER_OPTION, section=C.ENVIRONMENT, default=C.CENSUS_CLUSTER) logfile = self.getconfig(C.GUROBI_LOGFILE_NAME) isv_name = self.getconfig(C.GRB_ISV_NAME, section=C.ENVIRONMENT, default='') app_name = self.getconfig(C.GRB_APP_NAME, section=C.ENVIRONMENT, default='') # env = None for attempt in range(1, retries): try: if (cluster == C.GAM_CLUSTER) or (isv_name == ''): # Use academic license env = gb.Env(logfile) else: # Use commercial license env3 = self.getint(C.GRB_ENV3, section=C.ENVIRONMENT) env4 = self.getconfig(C.GRB_ENV4, section=C.ENVIRONMENT).strip() env = gb.Env.OtherEnv(logfile, isv_name, app_name, env3, env4) logging.info("Acquired gurobi license on attempt %s", attempt) # We got the environment, so break and return it return env except gb.GurobiError as err: # If the environment is not obtained, wait some random time and try again if attempt number is still within range rand_wait = ( C.GUROBI_LICENSE_RETRY_EXPONENTIAL_BASE**(attempt - 1) + np.random.uniform(0, C.GUROBI_LICENSE_RETRY_JITTER)) logging.info( "Failed to acquire gurobi license on attempt %s; waiting %s", (attempt, rand_wait)) logging.info("(Gurobi error %s)", str(err)) time.sleep(rand_wait) # Attempt number loop is over, ran out of attempts, raise the latest Gurobi error raise RuntimeError( "Could not acquire Gurobi license, see logfile for more info")