def getSimulator(self, simulator): if simulator == "logistics": return Logistics(start=True) elif simulator == "pong": return Pong(start=True) elif simulator == "tetris": return Tetris(start=True) elif simulator == "wumpus": return Wumpus(start=True) elif simulator == "blocks": return Blocks_world(start=True) elif simulator == "blackjack": return Game(start=True) elif simulator == "50chain": return Chain(start=True) elif simulator == "net_admin": return Admin(start=True) else: print('{} is not a supported simulation'.format(simulator)) raise NameError()
def start(self): facts, examples, bk = [], [], [] i = 0 values = {} while i < self.transfer * 5 + 3: print(i) if self.simulator == "blocks": state = Blocks_world(number=self.state_number, start=True) if not bk: bk = Blocks_world.bk elif self.simulator == "net_admin": state = Admin(number=self.state_number, start=True) if not bk: bk = Admin.bk with open(self.simulator + "_transfer_out.txt", "a") as f: if self.transfer: f.write("start state: " + str(state.get_state_facts()) + "\n") time_elapsed = 0 within_time = True start = clock() trajectory = [(state.state_number, state.get_state_facts())] while not state.goal(): if self.transfer: f.write("=" * 80 + "\n") state_action_pair = state.execute_random_action() state = state_action_pair[0] if self.transfer: f.write(str(state.get_state_facts()) + "\n") trajectory.append( (state.state_number, state.get_state_facts())) end = clock() time_elapsed = abs(end - start) if self.simulator == "net_admin" and time_elapsed > 1: within_time = False break if within_time: self.compute_value_of_trajectory(values, trajectory) self.state_number += len(trajectory) + 1 for key in values: facts += list(key[1]) example_predicate = "value(s" + str( key[0]) + ") " + str(values[key]) examples.append(example_predicate) i += 1 ''' with open("facts.txt","a") as f: for fact in facts: f.write(fact+"\n") with open("examples.txt","a") as f: for example in examples: f.write(example+"\n") ''' reg = GradientBoosting(regression=True, treeDepth=3, trees=self.trees, sampling_rate=0.1, loss=self.loss) reg.setTargets(["value"]) reg.learn(facts, examples, bk) self.model = reg
def AVI(self): #values = {} for i in range(self.number_of_iterations): training_rmses_per_traj = [] trajectories = [] totals = [] j = 0 facts, examples, bk = [], [], [] values = {} print "The exploration policy is", self.explore while j < self.batch_size: if self.simulator == "logistics": state = Logistics(number=self.state_number, start=True) if not bk: bk = Logistics.bk elif self.simulator == "pong": state = Pong(number=self.state_number, start=True) if not bk: bk = Pong.bk elif self.simulator == "tetris": state = Tetris(number=self.state_number, start=True) if not bk: bk = Tetris.bk elif self.simulator == "wumpus": state = Wumpus(number=self.state_number, start=True) if not bk: bk = Wumpus.bk elif self.simulator == "blocks": state = Blocks_world(number=self.state_number, start=True) if not bk: bk = Blocks_world.bk elif self.simulator == "blackjack": state = Game(number=self.state_number, start=True) if not bk: bk = Game.bk elif self.simulator == "50chain": state = Chain(number=self.state_number, start=True) if not bk: bk = Chain.bk elif self.simulator == "net_admin": state = Admin(number=self.state_number, start=True) if not bk: bk = Admin.bk with open(self.resultpath + self.simulator + "_FVI_out.txt", "a") as fp: fp.write("*" * 80 + "\nstart state: " + str(state.get_state_facts()) + "\n") time_elapsed = 0 within_time = True start = clock() trajectory = [] while not state.goal(): #print ("new state action") fp.write("=" * 80 + "\n") s_number = state.state_number s_facts = state.get_state_facts() #print "state:",state.get_state_facts() state_action_pair = state.execute_random_action( actn_dist=(1 - self.explore)) #print "state_action_pair", state_action_pair[0].state_number state = state_action_pair[0] # state #print "state", state.state_number # action and remove period action = state_action_pair[1][0][:-1] #print "action:",action #raw_input() fp.write(str(state.get_state_facts()) + "\n") trajectory.append((s_number, s_facts + [action])) end = clock() time_elapsed = abs(end - start) if self.simulator == "logistics" and time_elapsed > 0.5: within_time = False break elif self.simulator == "pong" and time_elapsed > 1000: within_time = False break elif self.simulator == "tetris" and time_elapsed > 10: within_time = False break elif self.simulator == "wumpus" and time_elapsed > 1: within_time = False break elif self.simulator == "blocks" and time_elapsed > 1: within_time = False break elif self.simulator == "blackjack" and time_elapsed > 1: within_time = False break elif self.simulator == "50chain" and time_elapsed > 1: within_time = False break elif self.simulator == "net_id" and time_elapsed > 1: within_time = False #print "The trajectory is", trajectory trajectories.append(trajectory) if within_time: #print "*"*40+str("trajectory complete")+"*"*40 #print (trajectory) #raw_input() self.init_values(values, trajectory) #totals.append(self.compute_value_of_trajectory( # values, trajectory, AVI=True)) self.compute_value_of_trajectory(values, trajectory, AVI=True) # perform computation using fitted value iteration #totals.append(self.compute_value_of_trajectory( # values, trajectory, AVI=True)) self.state_number += 1 for key in values: if values[key]: examples_string = key for state_key in values[key]: facts += list(state_key[1]) examples_string += " " + \ str(values[key][state_key]) examples.append(examples_string) print "The Value iteration no is", i rmse_train = self.compute_train_error( values, trajectory) training_rmses_per_traj.append(rmse_train) j += 1 """Decaying the exploitation probability""" #if (i!=0) and (i%10 ==0): # self.explore=(self.explore/1.5) #training_rmses.append(rmse_train) self.training_rmse.append( sum(training_rmses_per_traj) / float(len(training_rmses_per_traj))) #with open("average_cumulative_rewards.txt","a") as fp: # self.total_rewards.append(sum(totals)/float(len(totals))) # fp.write(str(sum(totals)/float(len(totals)))+"\n") # self.model.infer(facts,examples) #fitted_values = self.model.infer(facts, examples) bellman_error = self.compute_bellman_error(trajectories, aggregate='avg') self.bellman_error_avg.append(bellman_error) bellman_error = self.compute_bellman_error(trajectories, aggregate='max') self.bellman_error_max.append(bellman_error) #with open(self.resultpath+self.simulator+"_BEs.txt", "a") as f: # f.write("iteration: "+str(i) + # " average bellman error: "+str(bellman_error)+"\n") examples = [] for key in values: # TODO fix this if values[key]: examples_string = key for state_key in values[key]: examples_string += " " + str(values[key][state_key]) examples.append(examples_string) targets = self.get_targets(examples) self.model.setTargets(targets) self.model.learn(facts, examples, bk) #print "self.trees_latest before assignment" #print "************************************" #for item in self.actions_all: # trees=self.trees_latest[item] # for tree in trees: # print self.model.get_tree_clauses(tree) #print "************************************" #print"self.model.trees" for target in self.actions_all: if target not in self.model.trees: #print (target) #print (self.model.trees.keys()) #print "model.trees does not have all the targets" try: print "self.model.trees[target]", self.model.trees[ target], self.trees_latest[target] except: print "No tree for the target found" self.model.trees[target] = deepcopy( self.trees_latest[target]) self.model.addTarget(target) self.print_tree(self.model) raw_input("Value Iteration" + str(i)) self.trees_latest = deepcopy(self.model.trees) print "************************************" #print "self.trees_latest after assignment" #for item in self.model.targets: # trees=self.trees_latest[item] # for tree in trees: # print self.model.get_tree_clauses(tree) #print "************************************" """Test trajectory generation and value function Inference""" i = 0 testing_rmse_per_traj = [] while i < self.test_trajectory_no: #test trajectories for logistics if self.simulator == "logistics": # Add other domains specific to testing state = Logistics(number=self.state_number, start=True) elif self.simulator == "blocks": state = Blocks_world(number=self.state_number, start=True) time_elapsed = 0 within_time = True start = clock() trajectory = [] test_trajectory = [] while not state.goal(): s_number = state.state_number s_facts = state.get_state_facts() prev_state = deepcopy(state) state_action_pair = state.execute_random_action( actn_dist=(1 - self.test_explore)) state = state_action_pair[0] action = state_action_pair[1][0][:-1] test_trajectory.append((prev_state, action)) trajectory.append((s_number, s_facts + [action])) end = clock() time_elapsed = abs(end - start) if self.simulator == "logistics" and time_elapsed > 1: within_time = False i = i - 1 break elif self.simulator == "blocks" and time_elapsed > 1: within_time = False i = i - 1 break if within_time: #print "************* i is", i #print "The test trajectory is", trajectory #####raw_input() #print ("*"*40+str("trajectory complete")+"*"*40) #print (trajectory) #raw_input() self.init_values(values, trajectory) self.test_trajectories_output.append( self.get_trajectory_mismatch(test_trajectory)) #self.test_trajectories_output += [test_trajectory_output[0]] #self.test_trajetories_mismatches += [test_trajectory_output[1]] self.compute_value_of_test_trajectory(values, trajectory, AVI=True) rmse_test = self.compute_train_error(values, trajectory) testing_rmse_per_traj.append(rmse_test) self.state_number += 1 i += 1 self.testing_rmse.append( sum(testing_rmse_per_traj) / float(len(testing_rmse_per_traj)))
def compute_transfer_model(self): '''computes the transfer model if transfer=1 therefore it computes transfer model over 6 iterations if set to 1, which can be changed in the code otherwise, it uses at least one trajectory to compute the initial model before starting fitted value iteration. Note that in the transfer start state, parameters to allow different grid sizes, lets say for wumpus world can be set during object call if allowable by the constructor. ''' """Creates separate run directories in the destination folder to store results from each run""" dirName = self.resultpath + "//Run" + str(self.current_run) if not os.path.exists(self.resultpath + "//Run" + str(self.current_run)): os.makedirs(dirName) print("Directory ", dirName, " Created ") else: print("Directory ", dirName, " already exists") self.resultpath = dirName + "//" facts, examples, bk, reward_function = [], [], [], [] i = 0 values = {} while i < self.burn_in_no_of_traj: # at least ten iteration burn in time if self.simulator == "logistics": state = Logistics(number=self.state_number, start=True) if not bk: bk = Logistics.bk elif self.simulator == "pong": state = Pong(number=self.state_number, start=True) if not bk: bk = Pong.bk elif self.simulator == "tetris": state = Tetris(number=self.state_number, start=True) if not bk: bk = Tetris.bk elif self.simulator == "wumpus": state = Wumpus(number=self.state_number, start=True) if not bk: bk = Wumpus.bk elif self.simulator == "blocks": state = Blocks_world(number=self.state_number, start=True) if not bk: bk = Blocks_world.bk elif self.simulator == "blackjack": state = Game(number=self.state_number, start=True) if not bk: bk = Game.bk elif self.simulator == "50chain": state = Chain(number=self.state_number, start=True) if not bk: bk = Chain.bk elif self.simulator == "net_admin": state = Admin(number=self.state_number, start=True) if not bk: bk = Admin.bk with open(self.simulator + "_transfer_out.txt", "a") as f: if self.transfer: f.write("start state: " + str(state.get_state_facts()) + "\n") time_elapsed = 0 within_time = True start = clock() trajectory = [] while not state.goal(): if self.transfer: f.write("=" * 80 + "\n") s_number = state.state_number s_facts = state.get_state_facts() state_action_pair = state.execute_random_action( actn_dist=(1 - self.explore)) state = state_action_pair[0] # state # action and remove period action = state_action_pair[1][0][:-1] if self.transfer: f.write(str(state.get_state_facts()) + "\n") trajectory.append((s_number, s_facts + [action])) end = clock() time_elapsed = abs(end - start) if self.simulator == "logistics" and time_elapsed > 0.5: within_time = False break elif self.simulator == "pong" and time_elapsed > 1000: within_time = False break elif self.simulator == "tetris" and time_elapsed > 1000: within_time = False break elif self.simulator == "wumpus" and time_elapsed > 1: within_time = False break elif self.simulator == "blocks" and time_elapsed > 3: within_time = False break elif self.simulator == "blackjack" and time_elapsed > 1: within_time = False break elif self.simulator == "50chain" and time_elapsed > 1: within_time = False break elif self.simulator == "net_admin" and time_elapsed > 1: within_time = False break #print "within_time flag is", within_time if within_time: print "The trajectory is", trajectory raw_input() #print ("state_action within time") self.init_values(values, trajectory) total = self.compute_value_of_trajectory( values, trajectory) #print (trajectory) #raw_input() self.state_number += len(trajectory) + 1 for target in values: for state in values[target]: facts += tuple(state[1]) examples.append(target + " " + str(values[target][state])) i += 1 targets = self.get_targets(examples) reg = GradientBoosting(regression=True, treeDepth=self.treeDepth, trees=self.trees, loss=self.loss) reg.setTargets(targets) #print "Facts are", facts #raw_input() reg.learn(facts, examples, bk) self.model = reg self.trees_latest = deepcopy(self.model.trees) self.print_tree(self.model) raw_input("BURN IN FINISHED") self.explore = 1 self.AVI() if self.transfer: self.AVI()