Exemple #1
0
    def getSimulator(self, simulator):
        if simulator == "logistics":
            return Logistics(start=True)

        elif simulator == "pong":
            return Pong(start=True)

        elif simulator == "tetris":
            return Tetris(start=True)

        elif simulator == "wumpus":
            return Wumpus(start=True)

        elif simulator == "blocks":
            return Blocks_world(start=True)

        elif simulator == "blackjack":
            return Game(start=True)

        elif simulator == "50chain":
            return Chain(start=True)

        elif simulator == "net_admin":
            return Admin(start=True)
        else:
            print('{} is not a supported simulation'.format(simulator))
            raise NameError()
    def AVI(self):
        #values = {}
        for i in range(self.number_of_iterations):
            training_rmses_per_traj = []
            trajectories = []
            totals = []
            j = 0
            facts, examples, bk = [], [], []
            values = {}
            print "The exploration policy is", self.explore
            while j < self.batch_size:
                if self.simulator == "logistics":
                    state = Logistics(number=self.state_number, start=True)
                    if not bk:
                        bk = Logistics.bk
                elif self.simulator == "pong":
                    state = Pong(number=self.state_number, start=True)
                    if not bk:
                        bk = Pong.bk
                elif self.simulator == "tetris":
                    state = Tetris(number=self.state_number, start=True)
                    if not bk:
                        bk = Tetris.bk
                elif self.simulator == "wumpus":
                    state = Wumpus(number=self.state_number, start=True)
                    if not bk:
                        bk = Wumpus.bk
                elif self.simulator == "blocks":
                    state = Blocks_world(number=self.state_number, start=True)
                    if not bk:
                        bk = Blocks_world.bk
                elif self.simulator == "blackjack":
                    state = Game(number=self.state_number, start=True)
                    if not bk:
                        bk = Game.bk
                elif self.simulator == "50chain":
                    state = Chain(number=self.state_number, start=True)
                    if not bk:
                        bk = Chain.bk
                elif self.simulator == "net_admin":
                    state = Admin(number=self.state_number, start=True)
                    if not bk:
                        bk = Admin.bk
                with open(self.resultpath + self.simulator + "_FVI_out.txt",
                          "a") as fp:
                    fp.write("*" * 80 + "\nstart state: " +
                             str(state.get_state_facts()) + "\n")
                    time_elapsed = 0
                    within_time = True
                    start = clock()
                    trajectory = []
                    while not state.goal():
                        fp.write("=" * 80 + "\n")
                        s_number = state.state_number
                        s_facts = state.get_state_facts()
                        state_action_pair = state.execute_random_action(
                            actn_dist=(1 - self.explore))
                        state = state_action_pair[0]  # state
                        # action and remove period
                        action = state_action_pair[1][0][:-1]
                        fp.write(str(state.get_state_facts()) + "\n")
                        trajectory.append((s_number, s_facts + [action]))
                        end = clock()
                        time_elapsed = abs(end - start)
                        if self.simulator == "logistics" and time_elapsed > 0.5:
                            within_time = False
                            break
                        elif self.simulator == "pong" and time_elapsed > 1000:
                            within_time = False
                            break
                        elif self.simulator == "tetris" and time_elapsed > 10:
                            within_time = False
                            break
                        elif self.simulator == "wumpus" and time_elapsed > 1:
                            within_time = False
                            break
                        elif self.simulator == "blocks" and time_elapsed > 1:
                            within_time = False
                            break
                        elif self.simulator == "blackjack" and time_elapsed > 1:
                            within_time = False
                            break
                        elif self.simulator == "50chain" and time_elapsed > 1:
                            within_time = False
                            break
                        elif self.simulator == "net_id" and time_elapsed > 1:
                            within_time = False
                    #print "The  trajectory is", trajectory
                    trajectories.append(trajectory)
                    if within_time:
                        self.init_values(values, trajectory)
                        #totals.append(self.compute_value_of_trajectory(
                        #    values, trajectory, AVI=True))
                        self.compute_value_of_trajectory(values,
                                                         trajectory,
                                                         AVI=True)
                        # perform computation using fitted value iteration
                        #totals.append(self.compute_value_of_trajectory(
                        #    values, trajectory, AVI=True))
                        self.state_number += 1
                        for key in values:
                            if values[key]:
                                examples_string = key
                                for state_key in values[key]:
                                    facts += list(state_key[1])
                                    examples_string += " " + \
                                        str(values[key][state_key])
                                examples.append(examples_string)
                        print "The Value iteration no is", i
                        rmse_train = self.compute_train_error(
                            values, trajectory)
                        training_rmses_per_traj.append(rmse_train)
                        j += 1
            """Decaying the exploitation probability"""
            #if (i!=0) and (i%10 ==0):
            #   self.explore=(self.explore/1.5)

            #training_rmses.append(rmse_train)
            self.training_rmse.append(
                sum(training_rmses_per_traj) /
                float(len(training_rmses_per_traj)))
            #with open("average_cumulative_rewards.txt","a") as fp:
            #    self.total_rewards.append(sum(totals)/float(len(totals)))
            #    fp.write(str(sum(totals)/float(len(totals)))+"\n")
            # self.model.infer(facts,examples)
            #fitted_values = self.model.infer(facts, examples)
            bellman_error = self.compute_bellman_error(trajectories,
                                                       aggregate='avg')
            self.bellman_error_avg.append(bellman_error)
            bellman_error = self.compute_bellman_error(trajectories,
                                                       aggregate='max')
            self.bellman_error_max.append(bellman_error)
            #with open(self.resultpath+self.simulator+"_BEs.txt", "a") as f:
            #    f.write("iteration: "+str(i) +
            #            " average bellman error: "+str(bellman_error)+"\n")
            examples = []
            for key in values:  # TODO fix this
                if values[key]:
                    examples_string = key
                    for state_key in values[key]:
                        examples_string += " " + str(values[key][state_key])
                    examples.append(examples_string)
            targets = self.get_targets(examples)
            self.model.setTargets(targets)
            self.model.learn(facts, examples, bk)

            #print "self.trees_latest before assignment"
            #print "************************************"
            #for item in self.actions_all:
            #    trees=self.trees_latest[item]
            #    for tree in trees:
            #        print self.model.get_tree_clauses(tree)
            #print "************************************"
            #print"self.model.trees"

            for target in self.actions_all:
                if target not in self.model.trees:
                    #print (target)
                    #print (self.model.trees.keys())
                    #print "model.trees does not have all the targets"
                    try:
                        print "self.model.trees[target]", self.model.trees[
                            target], self.trees_latest[target]
                    except:
                        print "No tree for the target found"
                        self.model.trees[target] = deepcopy(
                            self.trees_latest[target])
                        self.model.addTarget(target)

            #self.print_tree(self.model)

            self.trees_latest = deepcopy(self.model.trees)
            #print "************************************"
            #print "self.trees_latest after assignment"
            #for item in self.model.targets:
            #    trees=self.trees_latest[item]
            #    for tree in trees:
            #        print self.model.get_tree_clauses(tree)
            #print "************************************"
        """Test trajectory generation and value function Inference"""
        i = 0
        testing_rmse_per_traj = []
        while i < self.test_trajectory_no:  #test trajectories for logistics
            if self.simulator == "logistics":  # Add other domains specific to testing
                state = Logistics(number=self.state_number, start=True)
            elif self.simulator == "blocks":
                state = Blocks_world(number=self.state_number, start=True)
                time_elapsed = 0
                within_time = True
                start = clock()
                trajectory = []
                test_trajectory = []
                while not state.goal():
                    s_number = state.state_number
                    s_facts = state.get_state_facts()
                    prev_state = deepcopy(state)
                    state_action_pair = state.execute_random_action(
                        actn_dist=(1 - self.test_explore))
                    state = state_action_pair[0]
                    action = state_action_pair[1][0][:-1]
                    test_trajectory.append((prev_state, action))
                    trajectory.append((s_number, s_facts + [action]))
                    end = clock()
                    time_elapsed = abs(end - start)
                    if self.simulator == "logistics" and time_elapsed > 1:
                        within_time = False
                        i = i - 1
                        break
                    elif self.simulator == "blocks" and time_elapsed > 1:
                        within_time = False
                        i = i - 1
                        break
                if within_time:
                    #print "************* i is", i
                    #print "The  test trajectory is", trajectory
                    #####raw_input()
                    self.init_values(values, trajectory)
                    self.test_trajectories_output.append(
                        self.get_trajectory_mismatch(test_trajectory))
                    #self.test_trajectories_output += [test_trajectory_output[0]]
                    #self.test_trajetories_mismatches += [test_trajectory_output[1]]
                    self.compute_value_of_test_trajectory(values,
                                                          trajectory,
                                                          AVI=True)
                    rmse_test = self.compute_train_error(values, trajectory)
                    testing_rmse_per_traj.append(rmse_test)
                    self.state_number += 1
            i += 1
        self.testing_rmse.append(
            sum(testing_rmse_per_traj) / float(len(testing_rmse_per_traj)))
    def compute_transfer_model(self):
        '''computes the transfer model if transfer=1
           therefore it computes transfer model over 6 iterations
           if set to 1, which can be changed in the code
           otherwise, it uses at least one trajectory to compute the initial model
           before starting fitted value iteration.
           Note that in the transfer start state, parameters to allow different grid sizes,
           lets say for wumpus world can be set during object call if allowable by the constructor.
        '''
        """Creates separate run directories in the destination folder to store results from each run"""

        dirName = self.resultpath + "//Run" + str(self.current_run)
        if not os.path.exists(self.resultpath + "//Run" +
                              str(self.current_run)):
            os.makedirs(dirName)
            print("Directory ", dirName, " Created ")
        else:
            print("Directory ", dirName, " already exists")

        self.resultpath = dirName + "//"

        facts, examples, bk, reward_function = [], [], [], []
        i = 0
        values = {}
        while i < self.burn_in_no_of_traj:  # at least ten iteration burn in time
            if self.simulator == "logistics":
                state = Logistics(number=self.state_number, start=True)
                if not bk:
                    bk = Logistics.bk
            elif self.simulator == "pong":
                state = Pong(number=self.state_number, start=True)
                if not bk:
                    bk = Pong.bk
            elif self.simulator == "tetris":
                state = Tetris(number=self.state_number, start=True)
                if not bk:
                    bk = Tetris.bk
            elif self.simulator == "wumpus":
                state = Wumpus(number=self.state_number, start=True)
                if not bk:
                    bk = Wumpus.bk
            elif self.simulator == "blocks":
                state = Blocks_world(number=self.state_number, start=True)
                if not bk:
                    bk = Blocks_world.bk
            elif self.simulator == "blackjack":
                state = Game(number=self.state_number, start=True)
                if not bk:
                    bk = Game.bk
            elif self.simulator == "50chain":
                state = Chain(number=self.state_number, start=True)
                if not bk:
                    bk = Chain.bk
            elif self.simulator == "net_admin":
                state = Admin(number=self.state_number, start=True)
                if not bk:
                    bk = Admin.bk
            with open(self.simulator + "_transfer_out.txt", "a") as f:
                if self.transfer:
                    f.write("start state: " + str(state.get_state_facts()) +
                            "\n")
                time_elapsed = 0
                within_time = True
                start = clock()
                trajectory = []
                while not state.goal():
                    if self.transfer:
                        f.write("=" * 80 + "\n")
                    s_number = state.state_number
                    s_facts = state.get_state_facts()
                    state_action_pair = state.execute_random_action(
                        actn_dist=(1 - self.explore))
                    state = state_action_pair[0]  # state
                    # action and remove period
                    action = state_action_pair[1][0][:-1]
                    if self.transfer:
                        f.write(str(state.get_state_facts()) + "\n")
                    trajectory.append((s_number, s_facts + [action]))
                    end = clock()
                    time_elapsed = abs(end - start)
                    if self.simulator == "logistics" and time_elapsed > 0.5:
                        within_time = False
                        break
                    elif self.simulator == "pong" and time_elapsed > 1000:
                        within_time = False
                        break
                    elif self.simulator == "tetris" and time_elapsed > 1000:
                        within_time = False
                        break
                    elif self.simulator == "wumpus" and time_elapsed > 1:
                        within_time = False
                        break
                    elif self.simulator == "blocks" and time_elapsed > 1:
                        within_time = False
                        break
                    elif self.simulator == "blackjack" and time_elapsed > 1:
                        within_time = False
                        break
                    elif self.simulator == "50chain" and time_elapsed > 1:
                        within_time = False
                        break
                    elif self.simulator == "net_admin" and time_elapsed > 1:
                        within_time = False
                        break
                if within_time:
                    print "The trajectory is", trajectory
                    raw_input()
                    self.init_values(values, trajectory)
                    total = self.compute_value_of_trajectory(
                        values, trajectory)
                    self.state_number += len(trajectory) + 1
                    for target in values:
                        for state in values[target]:
                            facts += tuple(state[1])
                            examples.append(target + " " +
                                            str(values[target][state]))
                    i += 1
        targets = self.get_targets(examples)
        reg = GradientBoosting(regression=True,
                               treeDepth=self.treeDepth,
                               trees=self.trees,
                               loss=self.loss)
        reg.setTargets(targets)
        reg.learn(facts, examples, bk)
        self.model = reg
        self.trees_latest = deepcopy(self.model.trees)
        #####raw_input("BURN IN FINISHED")
        self.explore = 0.9
        self.AVI()
        if self.transfer:
            self.AVI()