def part_iii_evaluation(sim_filename): print sim_filename mdp = MDP("blank_2_actions_81_states_mdp.txt") results = [] # prior: assume each transition seen once transition_count = [[[0.1 for _ in range(81)] for _ in range(81)] for _ in range(2)] for n in range(10): print "Big loop " + str(n) results.append([]) for i in range(100): mdp, transition_count = adp_rl(mdp, Sim(MDP(sim_filename)), transition_count) value_fn, policy, iterations = plan(mdp, 0.99, 0.01) print "Value: " + str(value_fn) print "Policy: " + str(policy) # print "Reward: " + str(mdp.rewards) # print "Transitions: " + str(mdp.transitions) for i in range(100): reward = run_policy(Sim(MDP(sim_filename)), policy) results[n].append(reward) print "Average reward of policy: " + str(average(results[n])) for l in results: print average(l)
def adp_rl(mdp, sim, transition_count): p_explore = 0.1 while sim.time < 200: old_state = sim.current val, policy, iterations = plan(mdp, 0.95, 0.05) # take action according to explore/exploit: epsilon-greedy # if epsilon # random # else # greedy if random.random() < p_explore: # choose random action action = random.choice(range(sim.get_actions())) # print "Random action: " + str(action) else: # choose policy action action = policy[old_state] reward = sim.do_action(action) new_state = sim.current # update model: # reward[current] = reward we received on the action # transition[action_taken][old_state][new_state] += epsilon # somehow remove epsilon from all other transitions like transition[action_taken][old_state][s] mdp.rewards[new_state] = reward transition_count[action][old_state][new_state] += 1 mdp = update_transitions(mdp, transition_count) return mdp, transition_count
def adp_rl(mdp, sim, transition_count): p_explore = 0.1 while sim.time < 200: old_state = sim.current val, policy, iterations = plan(mdp, 0.95, 0.05) # take action according to explore/exploit: epsilon-greedy # if epsilon # random # else # greedy if random.random() < p_explore: # choose random action action = random.choice(range(sim.get_actions())) #print "Random action: " + str(action) else: # choose policy action action = policy[old_state] reward = sim.do_action(action) new_state = sim.current # update model: # reward[current] = reward we received on the action # transition[action_taken][old_state][new_state] += epsilon # somehow remove epsilon from all other transitions like transition[action_taken][old_state][s] mdp.rewards[new_state] = reward transition_count[action][old_state][new_state] += 1 mdp = update_transitions(mdp, transition_count) return mdp, transition_count
def part_iii_evaluation(sim_filename): print sim_filename mdp = MDP("blank_2_actions_81_states_mdp.txt") results = [] # prior: assume each transition seen once transition_count = [[[0.1 for _ in range(81)] for _ in range(81)] for _ in range(2)] for n in range(10): print "Big loop " + str(n) results.append([]) for i in range(100): mdp, transition_count = adp_rl(mdp, Sim(MDP(sim_filename)), transition_count) value_fn, policy, iterations = plan(mdp, 0.99, 0.01) print "Value: " + str(value_fn) print "Policy: " + str(policy) #print "Reward: " + str(mdp.rewards) #print "Transitions: " + str(mdp.transitions) for i in range(100): reward = run_policy(Sim(MDP(sim_filename)), policy) results[n].append(reward) print "Average reward of policy: " + str(average(results[n])) for l in results: print average(l)
def updatePlanQueue(self, TMR): if TMR: new_plan = self.planSelect(TMR) if new_plan == None: return heappush(self.plans, new_plan) if (len(new_plan.prerequisites) > 0): for prereq in new_plan.prerequisites: if prereq[0] == "knowledge": prereq[2] = self.kblookup(prereq[1]) if prereq[0] == "plan": pt = PlanList.plan_map[prereq[1]] prereq_plan = plan(new_plan.priority - 1,pt[1],0,True,prereq[1]) heappush(self.plans, prereq_plan) self.plans[0].executeOneTimestep(TMR) elif len(self.plans) > 0: self.plans[0].executeOneTimestep(TMR) else: return if self.plans[0].finished: heappop(self.plans)
def updatePlanQueue(self, TMR): # Nothing to do in this case if TMR == 0 and len(self.plans) == 0: return # Continue with current plan, guaranteed to be one if the previous condition wasn't met elif TMR == 0: self.plans[0].executeOneTimestep(TMR) # New plan, and other plans exist already else: new_plan = self.planSelect(TMR) if new_plan == None: return heappush(self.plans, new_plan) if len(new_plan.prerequisites) > 0: for prereq in new_plan.prerequisites: if prereq[0] == "knowledge": prereq[2] = self.kblookup(prereq[1]) if prereq[0] == "plan": pt = PlanList.plan_map[prereq[1]] prereq_plan = plan(new_plan.priority - 1, pt[1], 0, True, prereq[1]) heappush(self.plans, prereq_plan) self.plans[0].executeOneTimestep(TMR) if self.plans[0].finished: heappop(self.plans)
def main() -> None: mode_help = "Mode help." # Get the description from the docstring of the function parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, usage=USAGE) # Set values of default arguments parser.set_defaults(dump="", staging=str(data / "staging"), server=str(data / "server"), ignore=['.jsonl', '.json', '.aae'], replace=False, mode="copy") parser.add_argument('-d', '--dump', type=str, required=False, help=mode_help) parser.add_argument('--staging', type=str, required=False, help=mode_help) parser.add_argument('--server', type=str, required=False, help=mode_help) parser.add_argument('-i', '--ignore', type=list, required=False, help=mode_help) parser.add_argument('-r', '--replace', type=bool, required=False, help=mode_help) parser.add_argument('-m', '--mode', type=str, required=False, help=mode_help) # Parse parameters cli_args = parser.parse_args() # Instantiate default args object args = Arguments(cli_args.dump, staging_paths(cli_args.staging), server_paths(cli_args.server), cli_args.ignore, cli_args.replace, cli_args.mode) print(args) # Get user input stage_opts = ["y", "n"] stage_question = ( f"\nDo you want to stage files? {'/'.join(stage_opts)}: ") stage_answer = cli_ask_question(question=stage_question, options=stage_opts) if stage_answer == "y": validate_staging(args) # Prepare migration from dump plan_staging = plan(source=args.dump, destinations=args.staging, ignore=args.ignore) # Execute migration execute(df=plan_staging, mode=args.mode, replace=args.replace) # Prepare migration to server from staging plan_server = plan(source=args.staging["HOME"], destinations=args.server, ignore=args.ignore) else: # Prepare direct migration to server plan_server = plan(source=args.dump, destinations=args.server, ignore=args.ignore) # Confirm load job load_options = ["y", "n"] load_question = ( f"\nReady to load data to the server? {'/'.join(load_options)}: ") load_answer = cli_ask_question(question=load_question, options=load_options) if load_answer == "y": # Execute migration execute(df=plan_server, mode=args.mode, replace=args.replace) else: print(f"\nAll files are ready to load in staging. Abortng.") return if stage_answer == "y": # Cleanup optionally_clean_dir(args.staging["HOME"])
#List of all rides rides.append((coordenates, earliest, latest)) return params, rides if __name__ == "__main__": """ uso: python3 main.py nombre """ filename = sys.argv[1] data, ridesRaw = get_info('input/{}.in'.format(filename)) rows, columns, nCars, nRides, bonus, TIME = data cars = [Car() for _ in range(nCars)] rides = [Ride(elem) for elem in ridesRaw] maxDistance = rows + columns results = plan(rides, cars, bonus, TIME, maxDistance) with open("{}.out".format(filename), "w") as f: for l in results: l = list(map(str, l)) line = str(len(l)) + ' ' + ' '.join(l) + '\n' f.write(line)