def solve(instance, tolerance, beta):
    XMLInstanceManager.write_cmdp_instance(instance, "pythonInstance.xml")
    expected_reward = ToolboxServer.send_request("solveXMLDomainMDP_dynamicrelaxation_{0}|{1}".format(tolerance, beta))

    if expected_reward == "EXCEPTION":
        raise SolveException("Problem could not be solved by server")

    return expected_reward
Ejemplo n.º 2
0
def solve(instance):
    XMLInstanceManager.write_cmdp_instance(instance, "pythonInstance.xml")
    expected_reward = ToolboxServer.send_request("solveXMLDomainMDP_colgen")

    if expected_reward == "EXCEPTION":
        raise SolveException("Problem could not be solved by server")

    return expected_reward
    def run(self, num_runs):
        ToolboxServer.send_request("startSimulation")

        cmdps = self.instance.cmdps
        num_agents = self.instance.get_num_agents()
        num_decisions = self.instance.num_decisions
        num_domain_resources = self.instance.num_domain_resources

        mean_reward = 0.0
        self.mean_total_cost = [0.0 for k in range(num_domain_resources)]
        self.mean_instantaneous_cost = [[0.0 for t in range(num_decisions)]
                                        for k in range(num_domain_resources)]
        num_total_violations = [0 for k in range(num_domain_resources)]
        num_instantaneous_violations = [[0 for t in range(num_decisions)]
                                        for k in range(num_domain_resources)]

        for run in range(num_runs):
            run_reward = 0.0
            run_total_cost = [0.0 for k in range(num_domain_resources)]
            run_instantaneous_cost = [[0.0 for t in range(num_decisions)]
                                      for k in range(num_domain_resources)]

            # get initial states
            state = [0 for i in range(num_agents)]
            for i in range(num_agents):
                state[i] = cmdps[i].initial_state

            for t in range(num_decisions):
                # ask server for actions and convert to list with ints
                actions = ToolboxServer.send_request(
                    "getActionsCMDP_{0}_{1}".format(t, state))
                actions = actions.split()
                for i in range(len(actions)):
                    actions[i] = int(actions[i])

                # execute the actions and sample next states
                for i in range(num_agents):
                    cmdp = cmdps[i]
                    s = state[i]
                    a = actions[i]

                    run_reward += cmdp.get_time_reward(t, s, a)
                    for k in range(num_domain_resources):
                        run_total_cost[k] += cmdp.get_cost(k, s, a)
                        run_instantaneous_cost[k][t] += cmdp.get_cost(k, s, a)

                    destinations = cmdp.get_time_transition_destinations(
                        t, s, a)
                    probabilities = cmdp.get_time_transition_probabilities(
                        t, s, a)

                    ps = ProbabilitySample(destinations, probabilities)
                    state[i] = ps.sample_item()

            mean_reward += (run_reward / num_runs)
            for k in range(num_domain_resources):
                self.mean_total_cost[k] += run_total_cost[k] / num_runs

                if self.instance.use_budget_constraints and run_total_cost[
                        k] > self.instance.get_cost_limit_budget(k):
                    num_total_violations[k] += 1

                for t in range(num_decisions):
                    self.mean_instantaneous_cost[k][
                        t] += run_instantaneous_cost[k][t] / num_runs
                    if (not self.instance.use_budget_constraints
                        ) and run_instantaneous_cost[k][
                            t] > self.instance.get_cost_limit_instantaneous(
                                k, t):
                        num_instantaneous_violations[k][t] += 1

        # compute violation probability estimates
        self.violation_prob_estimate_total = [
            0.0 for k in range(num_domain_resources)
        ]
        self.violation_prob_estimate_instantaneous = [[
            0.0 for t in range(num_decisions)
        ] for k in range(num_domain_resources)]
        for k in range(num_domain_resources):
            self.violation_prob_estimate_total[
                k] = num_total_violations[k] / num_runs

            for t in range(num_decisions):
                self.violation_prob_estimate_instantaneous[k][
                    t] = num_instantaneous_violations[k][t] / num_runs

        return mean_reward
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#################################################################################

from evaluation.CMDPSimulator import CMDPSimulator
from util.ToolboxServer import ToolboxServer
from instances import InstanceGenerator
from algorithms.mdp.constrainedmdp import ConstrainedMDPFiniteHorizon
from algorithms.SolveException import SolveException
from util.SolutionManager import SolutionManager

ToolboxServer.connect()

# get instance
num_agents = 2
num_decisions = 7
instance = InstanceGenerator.get_advertising_instance(num_agents,
                                                      num_decisions)

try:
    # solve instance
    expected_reward = ConstrainedMDPFiniteHorizon.solve(instance)
    print("Expected reward:", expected_reward)
    print("Cost limit: ", instance.get_cost_limit_budget(0))

    # evaluate instance
    sim = CMDPSimulator(instance)
Ejemplo n.º 5
0
 def writeCMDPSolution(filename):
     command = "writeCMDPSolution_"
     command += filename
     ToolboxServer.send_request(command)
Ejemplo n.º 6
0
 def readCPOMDPSolution(filename):
     command = "readCPOMDPSolution_"
     command += filename
     ToolboxServer.send_request(command)
def get_webad_instance(num_agents, num_decisions):
    ToolboxServer.send_request("dumpDefaultDomain_webad_{0}_{1}".format(
        num_agents, num_decisions))
    instance = XMLInstanceManager.read_cpomdp_instance("javaInstance.xml")
    return instance
def get_tcl_multi_level_instance(num_agents, num_decisions):
    ToolboxServer.send_request(
        "dumpDefaultDomain_tclMultiLevel_{0}_{1}".format(
            num_agents, num_decisions))
    instance = XMLInstanceManager.read_cmdp_instance("javaInstance.xml")
    return instance
def get_tcl_fixed_limit_instance(num_agents, num_decisions):
    ToolboxServer.send_request(
        "dumpDefaultDomain_tclFixedLimit_{0}_{1}".format(
            num_agents, num_decisions))
    instance = XMLInstanceManager.read_cmdp_instance("javaInstance.xml")
    return instance