Esempio n. 1
0
def part_iii_evaluation(sim_filename):
    print sim_filename
    mdp = MDP("blank_2_actions_81_states_mdp.txt")
    results = []
    # prior: assume each transition seen once
    transition_count = [[[0.1 for _ in range(81)] for _ in range(81)] for _ in range(2)]

    for n in range(10):
        print "Big loop " + str(n)
        results.append([])
        for i in range(100):
            mdp, transition_count = adp_rl(mdp, Sim(MDP(sim_filename)), transition_count)
        value_fn, policy, iterations = plan(mdp, 0.99, 0.01)
        print "Value: " + str(value_fn)
        print "Policy: " + str(policy)
        # print "Reward: " + str(mdp.rewards)
        # print "Transitions: " + str(mdp.transitions)
        for i in range(100):
            reward = run_policy(Sim(MDP(sim_filename)), policy)
            results[n].append(reward)

        print "Average reward of policy: " + str(average(results[n]))

    for l in results:
        print average(l)
Esempio n. 2
0
def adp_rl(mdp, sim, transition_count):
    p_explore = 0.1
    while sim.time < 200:
        old_state = sim.current
        val, policy, iterations = plan(mdp, 0.95, 0.05)
        # take action according to explore/exploit: epsilon-greedy
        # if epsilon
        # random
        # else
        # greedy
        if random.random() < p_explore:
            # choose random action
            action = random.choice(range(sim.get_actions()))
            # print "Random action: " + str(action)
        else:
            # choose policy action
            action = policy[old_state]

        reward = sim.do_action(action)
        new_state = sim.current
        # update model:
        # reward[current] = reward we received on the action
        # transition[action_taken][old_state][new_state] += epsilon
        # somehow remove epsilon from all other transitions like transition[action_taken][old_state][s]
        mdp.rewards[new_state] = reward
        transition_count[action][old_state][new_state] += 1
        mdp = update_transitions(mdp, transition_count)

    return mdp, transition_count
Esempio n. 3
0
def adp_rl(mdp, sim, transition_count):
    p_explore = 0.1
    while sim.time < 200:
        old_state = sim.current
        val, policy, iterations = plan(mdp, 0.95, 0.05)
        # take action according to explore/exploit: epsilon-greedy
        # if epsilon
        # random
        # else
        # greedy
        if random.random() < p_explore:
            # choose random action
            action = random.choice(range(sim.get_actions()))
            #print "Random action: " + str(action)
        else:
            # choose policy action
            action = policy[old_state]

        reward = sim.do_action(action)
        new_state = sim.current
        # update model:
        # reward[current] = reward we received on the action
        # transition[action_taken][old_state][new_state] += epsilon
        # somehow remove epsilon from all other transitions like transition[action_taken][old_state][s]
        mdp.rewards[new_state] = reward
        transition_count[action][old_state][new_state] += 1
        mdp = update_transitions(mdp, transition_count)

    return mdp, transition_count
Esempio n. 4
0
def part_iii_evaluation(sim_filename):
    print sim_filename
    mdp = MDP("blank_2_actions_81_states_mdp.txt")
    results = []
    # prior: assume each transition seen once
    transition_count = [[[0.1 for _ in range(81)] for _ in range(81)]
                        for _ in range(2)]

    for n in range(10):
        print "Big loop " + str(n)
        results.append([])
        for i in range(100):
            mdp, transition_count = adp_rl(mdp, Sim(MDP(sim_filename)),
                                           transition_count)
        value_fn, policy, iterations = plan(mdp, 0.99, 0.01)
        print "Value: " + str(value_fn)
        print "Policy: " + str(policy)
        #print "Reward: " + str(mdp.rewards)
        #print "Transitions: " + str(mdp.transitions)
        for i in range(100):
            reward = run_policy(Sim(MDP(sim_filename)), policy)
            results[n].append(reward)

        print "Average reward of policy: " + str(average(results[n]))

    for l in results:
        print average(l)
Esempio n. 5
0
	def updatePlanQueue(self, TMR):
		if TMR:
			new_plan = self.planSelect(TMR)
			if new_plan == None:
				return
			heappush(self.plans, new_plan)
			if (len(new_plan.prerequisites) > 0):
				for prereq in new_plan.prerequisites:
					if prereq[0] == "knowledge":
						prereq[2] = self.kblookup(prereq[1])
					if prereq[0] == "plan":
						pt = PlanList.plan_map[prereq[1]]
						prereq_plan = plan(new_plan.priority - 1,pt[1],0,True,prereq[1])
						heappush(self.plans, prereq_plan)
			self.plans[0].executeOneTimestep(TMR)
		elif len(self.plans) > 0:
			self.plans[0].executeOneTimestep(TMR)
		else: 
			return
		if self.plans[0].finished:
			heappop(self.plans)
Esempio n. 6
0
 def updatePlanQueue(self, TMR):
     # Nothing to do in this case
     if TMR == 0 and len(self.plans) == 0:
         return
         # Continue with current plan, guaranteed to be one if the previous condition wasn't met
     elif TMR == 0:
         self.plans[0].executeOneTimestep(TMR)
         # New plan, and other plans exist already
     else:
         new_plan = self.planSelect(TMR)
         if new_plan == None:
             return
         heappush(self.plans, new_plan)
         if len(new_plan.prerequisites) > 0:
             for prereq in new_plan.prerequisites:
                 if prereq[0] == "knowledge":
                     prereq[2] = self.kblookup(prereq[1])
                 if prereq[0] == "plan":
                     pt = PlanList.plan_map[prereq[1]]
                     prereq_plan = plan(new_plan.priority - 1, pt[1], 0, True, prereq[1])
                     heappush(self.plans, prereq_plan)
         self.plans[0].executeOneTimestep(TMR)
     if self.plans[0].finished:
         heappop(self.plans)
Esempio n. 7
0
def main() -> None:

    mode_help = "Mode help."

    # Get the description from the docstring of the function
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, usage=USAGE)

    # Set values of default arguments
    parser.set_defaults(dump="",
                        staging=str(data / "staging"),
                        server=str(data / "server"),
                        ignore=['.jsonl', '.json', '.aae'],
                        replace=False,
                        mode="copy")

    parser.add_argument('-d',
                        '--dump',
                        type=str,
                        required=False,
                        help=mode_help)
    parser.add_argument('--staging', type=str, required=False, help=mode_help)
    parser.add_argument('--server', type=str, required=False, help=mode_help)
    parser.add_argument('-i',
                        '--ignore',
                        type=list,
                        required=False,
                        help=mode_help)
    parser.add_argument('-r',
                        '--replace',
                        type=bool,
                        required=False,
                        help=mode_help)
    parser.add_argument('-m',
                        '--mode',
                        type=str,
                        required=False,
                        help=mode_help)

    # Parse parameters
    cli_args = parser.parse_args()

    # Instantiate default args object
    args = Arguments(cli_args.dump, staging_paths(cli_args.staging),
                     server_paths(cli_args.server), cli_args.ignore,
                     cli_args.replace, cli_args.mode)
    print(args)

    # Get user input
    stage_opts = ["y", "n"]
    stage_question = (
        f"\nDo you want to stage files? {'/'.join(stage_opts)}: ")
    stage_answer = cli_ask_question(question=stage_question,
                                    options=stage_opts)

    if stage_answer == "y":

        validate_staging(args)

        # Prepare migration from dump
        plan_staging = plan(source=args.dump,
                            destinations=args.staging,
                            ignore=args.ignore)

        # Execute migration
        execute(df=plan_staging, mode=args.mode, replace=args.replace)

        # Prepare migration to server from staging
        plan_server = plan(source=args.staging["HOME"],
                           destinations=args.server,
                           ignore=args.ignore)

    else:
        # Prepare direct migration to server
        plan_server = plan(source=args.dump,
                           destinations=args.server,
                           ignore=args.ignore)

    # Confirm load job
    load_options = ["y", "n"]
    load_question = (
        f"\nReady to load data to the server? {'/'.join(load_options)}: ")
    load_answer = cli_ask_question(question=load_question,
                                   options=load_options)

    if load_answer == "y":
        # Execute migration
        execute(df=plan_server, mode=args.mode, replace=args.replace)
    else:
        print(f"\nAll files are ready to load in staging. Abortng.")
        return

    if stage_answer == "y":
        # Cleanup
        optionally_clean_dir(args.staging["HOME"])
Esempio n. 8
0
            #List of all rides
            rides.append((coordenates, earliest, latest))

        return params, rides


if __name__ == "__main__":
    """
    uso:

    python3 main.py nombre
    """
    filename = sys.argv[1]

    data, ridesRaw = get_info('input/{}.in'.format(filename))
    rows, columns, nCars, nRides, bonus, TIME = data

    cars = [Car() for _ in range(nCars)]
    rides = [Ride(elem) for elem in ridesRaw]
    maxDistance = rows + columns

    results = plan(rides, cars, bonus, TIME, maxDistance)

    with open("{}.out".format(filename), "w") as f:

        for l in results:
            l = list(map(str, l))
            line = str(len(l)) + ' ' + ' '.join(l) + '\n'
            f.write(line)