def add_reward_constrs(self): right_dir_penalty = self.inst_params['ELEVATOR-PENALTY-RIGHT-DIR'] wrong_dir_penalty = self.inst_params['ELEVATOR-PENALTY-WRONG-DIR'] for k in range(self.num_futures): for h in range(self.problem.horizon): for elevator in self.inst_params['elevators']: elevator_dir_up = self.get_elevator_dir_state(elevator, 'up') person_going_up = self.get_person_in_elevator_state(elevator, 'up') person_going_down = self.get_person_in_elevator_state(elevator, 'down') clique = MRFClique([self.var_to_idx[elevator_dir_up, k, h], self.var_to_idx[person_going_up, k, h]]) clique.function_table = [1, 1, math.exp(-wrong_dir_penalty), math.exp(-right_dir_penalty)] self.constrs['reward'].append(clique) clique = MRFClique([self.var_to_idx[elevator_dir_up, k, h], self.var_to_idx[person_going_down, k, h]]) clique.function_table = [1, 1, math.exp(-right_dir_penalty), math.exp(-wrong_dir_penalty)] self.constrs['reward'].append(clique) for fl in self.inst_params['floors']: person_waiting_up = self.get_person_waiting_state(fl, 'up') person_waiting_down = self.get_person_waiting_state(fl, 'down') clique = MRFClique([self.var_to_idx[person_waiting_up, k, h], self.var_to_idx[person_waiting_down, k, h]]) clique.function_table = [math.exp(val) for val in [0, -1, -1, -2]] self.constrs['reward'].append(clique) logger.info('Added reward constraints')
def add_reward_constrs(self): for k in range(self.num_futures): for h in range(self.problem.horizon): clique = MRFClique([self.var_to_idx[(self.goal, k, h)]]) clique.function_table = [math.exp(x) for x in [-1, 0]] self.constrs['reward'].append(clique) for v in self.problem.variables: clique = MRFClique([self.var_to_idx[(v, k, h)]]) clique.function_table = [ math.exp(x) for x in [0, -self.get_disappear_prob(v)] ] self.constrs['reward'].append(clique) logger.info('Added reward constraints')
def set_transition_constrs(self): self.constrs['transition'] = [] for k in range(self.num_futures): for h in range(1, self.problem.horizon): for v in self.problem.variables: # Clique: MSB[n1(h-1), n2(h-1),..., v(h-1), v(h), set(v, h-1)]LSB neighbours = self.neighbours[v] var_indices = [ self.var_to_idx[(self.find_matching_action(v), k, h - 1)], self.var_to_idx[(v, k, h)], self.var_to_idx[(v, k, h - 1)] ] var_indices.extend( [self.var_to_idx[(n, k, h - 1)] for n in neighbours]) num_vars = len(var_indices) determinized_transition = {} clique = MRFClique(var_indices) clique.function_table = [] for clique_bitmask in range(2**num_vars): dependencies_bitmask = ( (clique_bitmask >> 2) << 1) | (clique_bitmask & 1) if dependencies_bitmask in determinized_transition: determinized_val = determinized_transition[ dependencies_bitmask] else: is_set = (clique_bitmask & 1) != 0 num_set_neighbours = self.count_set_neighbours( clique_bitmask, len(neighbours)) is_alive = (clique_bitmask & 2) != 0 if (is_alive and num_set_neighbours in (2, 3)) \ or (not is_alive and num_set_neighbours == 3) \ or is_set: if random.random() <= (1. - self.noise_probs[v]): determinized_val = 1 else: determinized_val = 0 else: if random.random() <= self.noise_probs[v]: determinized_val = 1 else: determinized_val = 0 determinized_transition[ dependencies_bitmask] = determinized_val if (clique_bitmask & 2) >> 1 == determinized_val: clique.function_table.append(1) else: clique.function_table.append( mrf.INVALID_POTENTIAL_VAL) self.constrs['transition'].append(clique) logger.info('set_transition_constraints')
def set_init_states_constrs(self, init_state_vals): self.constrs['init_states'] = [] function_table_0 = [1, mrf.INVALID_POTENTIAL_VAL_2] function_table_1 = [mrf.INVALID_POTENTIAL_VAL_2, 1] for k in range(self.num_futures): for v in self.problem.variables: vars_indices = [self.var_to_idx[(v, k, 0)]] clique = MRFClique(vars_indices) if init_state_vals[v] == 0: clique.function_table = function_table_0 else: clique.function_table = function_table_1 self.constrs['init_states'].append(clique) logger.info('set_init_states_constraints')
def add_reward_constrs(self): for k in range(self.num_futures): for h in range(self.problem.horizon): for y in self.inst_params['ys']: clique = MRFClique([self.var_to_idx[y, k, h]]) clique.function_table = [math.exp(i) for i in [0, 1]] self.constrs['reward'].append(clique) logger.info('Added reward constraints')
def add_reward_constrs(self): function_table = [math.exp(x) for x in [0, 1, -1, 0]] for k in range(self.num_futures): for h in range(self.problem.horizon): for v in self.problem.variables: a = self.find_matching_action(v) vars_indices = [ self.var_to_idx[(v, k, h)], self.var_to_idx[(a, k, h)] ] clique = MRFClique(vars_indices) clique.function_table = function_table self.constrs['reward'].append(clique) logger.info('Added reward constraints')
def add_reward_cliques(self, reward_tree, tree_vars): assert(self.num_futures > 0 and self.problem.horizon > 0) var_indices = self.state_vars_to_indices(tree_vars, 0, 0) var_indices.append(self.get_reward_var_index(0, 0)) clique_proto = MRFClique(var_indices) clique_proto.generate_reward_function_table(reward_tree, tree_vars) self.cliques['reward'].append(clique_proto) for k in range(self.num_futures): for t in range(self.problem.horizon): if k == 0 and t == 0: continue var_indices = self.state_vars_to_indices(tree_vars, k, t) var_indices.append(self.get_reward_var_index(k, t)) clique = MRFClique(var_indices) clique.function_table = clique_proto.function_table self.cliques['reward'].append(clique)
def add_init_actions_cliques(self): function_table = [] allset = 2**self.num_futures - 1 for i in range(2**self.num_futures): if i == 0 or i == allset: function_table.append(1) else: function_table.append(INVALID_POTENTIAL_VAL) for action in self.problem.actions: vars_indices = [self.get_state_var_index(action, k, 0) for k in range(self.num_futures)] clique = MRFClique(vars_indices) clique.function_table = function_table self.cliques['init_actions'].append(clique) logger.info('added_init_actions_cliques|#init_actions_cliques={}' .format(len(self.cliques['init_actions'])))
def add_concurrency_constrs(self): function_table = [] for i in range(2**len(self.problem.actions)): if utils.count_set_bits(i) > self.problem.max_concurrency: function_table.append(INVALID_POTENTIAL_VAL) else: function_table.append(1) for k in range(self.num_futures): for t in range(self.problem.horizon): vars_indices = self.state_vars_to_indices(self.problem.actions, k, t) clique = MRFClique(vars_indices) clique.function_table = function_table self.cliques['concurrency'].append(clique) logger.info('added_concurrency_constrs|#concurrency_constrs_cliques={}' .format(len(self.cliques['concurrency'])))
def add_init_actions_constrs(self): function_table = [] allset = 2**self.num_futures - 1 for i in range(2**self.num_futures): if i == 0 or i == allset: function_table.append(1) else: function_table.append(mrf.INVALID_POTENTIAL_VAL) for action in self.problem.actions: vars_indices = [ self.var_to_idx[(action, k, 0)] for k in range(self.num_futures) ] clique = MRFClique(vars_indices) clique.function_table = function_table self.constrs['init_actions'].append(clique) logger.info('Added init actions constraints')
def add_concurrency_constrs(self): function_table = [] for i in range(2**len(self.problem.actions)): if utils.count_set_bits(i) > self.problem.max_concurrency: function_table.append(mrf.INVALID_POTENTIAL_VAL_2) else: function_table.append(1) for k in range(self.num_futures): for h in range(self.problem.horizon): vars_indices = [ self.var_to_idx[(action, k, h)] for action in self.problem.actions ] clique = MRFClique(vars_indices) clique.function_table = function_table self.constrs['concurrency'].append(clique) logger.info('Added concurrency constraints')
def set_transition_constrs(self): self.constrs['transition'] = [] for k in range(self.num_futures): for h in range(1, self.problem.horizon): for v in self.problem.variables: # Clique: MSB[n1(h-1), n2(h-1),..., v(h-1), v(h), reboot(v, h-1)]LSB neighbours = self.topology[v] var_indices = [ self.var_to_idx[(self.find_matching_action(v), k, h - 1)], self.var_to_idx[(v, k, h)], self.var_to_idx[(v, k, h - 1)] ] var_indices.extend( [self.var_to_idx[(n, k, h - 1)] for n in neighbours]) num_vars = len(var_indices) determinized_transition = {} clique = MRFClique(var_indices) clique.function_table = [] for clique_bitmask in range(2**num_vars): if clique_bitmask & 1 != 0: if clique_bitmask & 2 != 0: clique.function_table.append(1) else: clique.function_table.append( mrf.INVALID_POTENTIAL_VAL) else: dependencies_bitmask = clique_bitmask >> 2 if dependencies_bitmask in determinized_transition: determinized_val = determinized_transition[ dependencies_bitmask] else: if clique_bitmask & 4 != 0: running_neighbours = self.count_set_neighbours( clique_bitmask, len(neighbours)) running_prob = 0.45 + 0.5 * ( 1 + running_neighbours) / ( 1 + len(neighbours)) if random.random() <= running_prob: determinized_val = 1 else: determinized_val = 0 else: if random.random() <= self.REBOOT_PROB: determinized_val = 1 else: determinized_val = 0 determinized_transition[ dependencies_bitmask] = determinized_val if (clique_bitmask & 2) >> 1 == determinized_val: clique.function_table.append(1) else: clique.function_table.append( mrf.INVALID_POTENTIAL_VAL) self.constrs['transition'].append(clique) logger.info('set_transition_constraints')