def solve(instance, tolerance, beta): XMLInstanceManager.write_cmdp_instance(instance, "pythonInstance.xml") expected_reward = ToolboxServer.send_request("solveXMLDomainMDP_dynamicrelaxation_{0}|{1}".format(tolerance, beta)) if expected_reward == "EXCEPTION": raise SolveException("Problem could not be solved by server") return expected_reward
def solve(instance): XMLInstanceManager.write_cmdp_instance(instance, "pythonInstance.xml") expected_reward = ToolboxServer.send_request("solveXMLDomainMDP_colgen") if expected_reward == "EXCEPTION": raise SolveException("Problem could not be solved by server") return expected_reward
def run(self, num_runs): ToolboxServer.send_request("startSimulation") cmdps = self.instance.cmdps num_agents = self.instance.get_num_agents() num_decisions = self.instance.num_decisions num_domain_resources = self.instance.num_domain_resources mean_reward = 0.0 self.mean_total_cost = [0.0 for k in range(num_domain_resources)] self.mean_instantaneous_cost = [[0.0 for t in range(num_decisions)] for k in range(num_domain_resources)] num_total_violations = [0 for k in range(num_domain_resources)] num_instantaneous_violations = [[0 for t in range(num_decisions)] for k in range(num_domain_resources)] for run in range(num_runs): run_reward = 0.0 run_total_cost = [0.0 for k in range(num_domain_resources)] run_instantaneous_cost = [[0.0 for t in range(num_decisions)] for k in range(num_domain_resources)] # get initial states state = [0 for i in range(num_agents)] for i in range(num_agents): state[i] = cmdps[i].initial_state for t in range(num_decisions): # ask server for actions and convert to list with ints actions = ToolboxServer.send_request( "getActionsCMDP_{0}_{1}".format(t, state)) actions = actions.split() for i in range(len(actions)): actions[i] = int(actions[i]) # execute the actions and sample next states for i in range(num_agents): cmdp = cmdps[i] s = state[i] a = actions[i] run_reward += cmdp.get_time_reward(t, s, a) for k in range(num_domain_resources): run_total_cost[k] += cmdp.get_cost(k, s, a) run_instantaneous_cost[k][t] += cmdp.get_cost(k, s, a) destinations = cmdp.get_time_transition_destinations( t, s, a) probabilities = cmdp.get_time_transition_probabilities( t, s, a) ps = ProbabilitySample(destinations, probabilities) state[i] = ps.sample_item() mean_reward += (run_reward / num_runs) for k in range(num_domain_resources): self.mean_total_cost[k] += run_total_cost[k] / num_runs if self.instance.use_budget_constraints and run_total_cost[ k] > self.instance.get_cost_limit_budget(k): num_total_violations[k] += 1 for t in range(num_decisions): self.mean_instantaneous_cost[k][ t] += run_instantaneous_cost[k][t] / num_runs if (not self.instance.use_budget_constraints ) and run_instantaneous_cost[k][ t] > self.instance.get_cost_limit_instantaneous( k, t): num_instantaneous_violations[k][t] += 1 # compute violation probability estimates self.violation_prob_estimate_total = [ 0.0 for k in range(num_domain_resources) ] self.violation_prob_estimate_instantaneous = [[ 0.0 for t in range(num_decisions) ] for k in range(num_domain_resources)] for k in range(num_domain_resources): self.violation_prob_estimate_total[ k] = num_total_violations[k] / num_runs for t in range(num_decisions): self.violation_prob_estimate_instantaneous[k][ t] = num_instantaneous_violations[k][t] / num_runs return mean_reward
# but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. ################################################################################# from evaluation.CMDPSimulator import CMDPSimulator from util.ToolboxServer import ToolboxServer from instances import InstanceGenerator from algorithms.mdp.constrainedmdp import ConstrainedMDPFiniteHorizon from algorithms.SolveException import SolveException from util.SolutionManager import SolutionManager ToolboxServer.connect() # get instance num_agents = 2 num_decisions = 7 instance = InstanceGenerator.get_advertising_instance(num_agents, num_decisions) try: # solve instance expected_reward = ConstrainedMDPFiniteHorizon.solve(instance) print("Expected reward:", expected_reward) print("Cost limit: ", instance.get_cost_limit_budget(0)) # evaluate instance sim = CMDPSimulator(instance)
def writeCMDPSolution(filename): command = "writeCMDPSolution_" command += filename ToolboxServer.send_request(command)
def readCPOMDPSolution(filename): command = "readCPOMDPSolution_" command += filename ToolboxServer.send_request(command)
def get_webad_instance(num_agents, num_decisions): ToolboxServer.send_request("dumpDefaultDomain_webad_{0}_{1}".format( num_agents, num_decisions)) instance = XMLInstanceManager.read_cpomdp_instance("javaInstance.xml") return instance
def get_tcl_multi_level_instance(num_agents, num_decisions): ToolboxServer.send_request( "dumpDefaultDomain_tclMultiLevel_{0}_{1}".format( num_agents, num_decisions)) instance = XMLInstanceManager.read_cmdp_instance("javaInstance.xml") return instance
def get_tcl_fixed_limit_instance(num_agents, num_decisions): ToolboxServer.send_request( "dumpDefaultDomain_tclFixedLimit_{0}_{1}".format( num_agents, num_decisions)) instance = XMLInstanceManager.read_cmdp_instance("javaInstance.xml") return instance