def train(filename, seed, alpha2): here = path.dirname(__file__) map_fn = path.join(here, "craft/options.map") rng1 = Random(seed + 1) env1 = Craft(map_fn, rng1, 7, 7, objects=OBJECTS4, problem_mood=problem_mood) init1 = create_init([env1.get_all_item()[2]], [[7, 7]]) options = create_options(env1.get_all_item(), [7, 7]) tasks = [[OBJECTS4["target"]]] not_task = [OBJECTS4["key"]] tasks = tasks[START_TASK:END_TASK + 1] with open(filename, "w") as csvfile: print("ql: begin experiment") for j, goal in enumerate(tasks): print("ql: begin task {}".format(j + START_TASK)) try: start = time() report1 = SequenceReport(csvfile, LOG_STEP, init1, EPISODE_LENGTH, TRIALS) reward1 = ReachFacts(env1, goal, not_task, problem_mood) policy1 = Empathic(alpha=1.0, gamma=1.0, epsilon=0.2, default_q=DEFAULT_Q, num_actions=4, rng=rng1, others_q=[], others_init=[[7, 7], [7, 7], [7, 7], [7, 7], [7, 7]], others_dist=[0.2, 0.2, 0.2, 0.2, 0.2], penalty=-2 * EPISODE_LENGTH, others_alpha=[alpha2], objects=OBJECTS4, problem_mood=problem_mood, options=options) agent1 = Agent(env1, policy1, reward1, rng1) agent1.train(steps=TOTAL_STEPS2, steps_per_episode=EPISODE_LENGTH, report=report1) evaluate_agent(env1, policy1, reward1, init1) except KeyboardInterrupt: end = time() logging.warning("ql: interrupted task %s after %s seconds!", j + START_TASK, end - start)
def train(filename, seed, emp_func): here = path.dirname(__file__) map_fn = path.join(here, "craft/doll.map") rng1 = Random(seed + 1) env1 = Craft(map_fn, rng1, 1, 5, objects=OBJECTS2, problem_mood=problem_mood) rng2 = Random(seed + 2) env2 = Craft(map_fn, rng2, 4, 1, objects=OBJECTS2, problem_mood=problem_mood) rng3 = Random(seed + 3) env3 = Craft(map_fn, rng3, 5, 2, objects=OBJECTS2, problem_mood=problem_mood) rng4 = Random(seed + 4) env4 = Craft(map_fn, rng4, 1, 3, objects=OBJECTS2, problem_mood=problem_mood) rng5 = Random(seed + 5) env5 = Craft(map_fn, rng5, 1, 5, objects=OBJECTS2, problem_mood=problem_mood) rng6 = Random(seed + 6) env6 = Craft(map_fn, rng6, 1, 5, objects=OBJECTS2, problem_mood=problem_mood) init1 = create_init([env1.get_all_item()[1]], [[1, 5]]) init2 = create_init(env2.get_all_item(), [[4, 1]]) init3 = create_init(env3.get_all_item(), [[5, 2]]) init4 = create_init(env4.get_all_item(), [[1, 3]]) init5 = create_init([env5.get_all_item()[1]], [[1, 5]]) init6 = create_init([env5.get_all_item()[1]], [[1, 5]]) tasks = [[OBJECTS2["played"]]] not_task = [OBJECTS2["doll"]] tasks = tasks[START_TASK:END_TASK + 1] with open(filename, "w") as csvfile: print("ql: begin experiment") report1 = SequenceReport(csvfile, LOG_STEP, init1, EPISODE_LENGTH, TRIALS) report2 = SequenceReport(csvfile, LOG_STEP, init2, EPISODE_LENGTH, TRIALS) report3 = SequenceReport(csvfile, LOG_STEP, init3, EPISODE_LENGTH, TRIALS) report4 = SequenceReport(csvfile, LOG_STEP, init4, EPISODE_LENGTH, TRIALS) report5 = SequenceReport(csvfile, LOG_STEP, init5, EPISODE_LENGTH, TRIALS) report6 = SequenceReport(csvfile, LOG_STEP, init6, EPISODE_LENGTH, TRIALS) for j, goal in enumerate(tasks): print("ql: begin task {}".format(j + START_TASK)) rng2.seed(seed + j) reward2 = ReachFacts(env2, goal, [], problem_mood) policy2 = EpsilonGreedy(alpha=1.0, gamma=1.0, epsilon=0.2, default_q=DEFAULT_Q, num_actions=4, rng=rng2) agent2 = Agent(env2, policy2, reward2, rng2) rng3.seed(seed + j) reward3 = ReachFacts(env3, goal, [], problem_mood) policy3 = EpsilonGreedy(alpha=1.0, gamma=1.0, epsilon=0.2, default_q=DEFAULT_Q, num_actions=4, rng=rng3) agent3 = Agent(env3, policy3, reward3, rng3) rng4.seed(seed + j) reward4 = ReachFacts(env4, goal, [], problem_mood) policy4 = EpsilonGreedy(alpha=1.0, gamma=1.0, epsilon=0.2, default_q=DEFAULT_Q, num_actions=4, rng=rng4) agent4 = Agent(env4, policy4, reward4, rng4) rng6.seed(seed + j) reward6 = ReachFacts(env6, goal, [], problem_mood) policy6 = EpsilonGreedy(alpha=1.0, gamma=1.0, epsilon=0.2, default_q=DEFAULT_Q, num_actions=4, rng=rng6) agent6 = Agent(env6, policy6, reward6, rng6) try: agent2.train(steps=TOTAL_STEPS1, steps_per_episode=EPISODE_LENGTH, report=report2) agent3.train(steps=TOTAL_STEPS1, steps_per_episode=EPISODE_LENGTH, report=report3) agent4.train(steps=TOTAL_STEPS1, steps_per_episode=EPISODE_LENGTH, report=report4) agent6.train(steps=TOTAL_STEPS1, steps_per_episode=EPISODE_LENGTH, report=report6) report1 = SequenceReport(csvfile, LOG_STEP, init1, EPISODE_LENGTH, TRIALS) reward1 = ReachFacts(env1, goal, not_task, problem_mood) policy1 = Empathic(alpha=1.0, gamma=1.0, epsilon=0.2, default_q=DEFAULT_Q, num_actions=5, rng=rng1, others_q=[ policy2.get_Q(), policy2.get_Q(), policy3.get_Q(), policy3.get_Q(), policy4.get_Q() ], others_init=[[4, 1], [4, 1], [5, 2], [5, 2], [1, 3]], others_dist=[0.2, 0.2, 0.2, 0.2, 0.2], penalty=-2 * EPISODE_LENGTH, others_alpha=[5.0, 5.0, 5.0, 5.0, 5.0], objects=OBJECTS2, problem_mood=problem_mood, caring_func=emp_func) agent1 = Agent(env1, policy1, reward1, rng1) agent1.train(steps=TOTAL_STEPS2, steps_per_episode=EPISODE_LENGTH, report=report1) reward5 = ReachFacts(env5, goal, not_task, problem_mood) policy5 = Empathic(alpha=1.0, gamma=1.0, epsilon=0.2, default_q=DEFAULT_Q, num_actions=5, rng=rng5, others_q=[policy6.get_Q()], others_init=[[1, 5]], others_dist=[1.0], penalty=-2 * EPISODE_LENGTH, others_alpha=[5.0], objects=OBJECTS2, problem_mood=problem_mood) agent5 = Agent(env5, policy5, reward5, rng5) if emp_func == "baseline": agent5.train(steps=TOTAL_STEPS2, steps_per_episode=EPISODE_LENGTH, report=report5) evaluate_agent(env5, policy5, reward5, init5) elif emp_func == "nonaug": evaluate_agent(env6, policy6, reward6, init6) else: evaluate_agent(env1, policy1, reward1, init1) except KeyboardInterrupt: logging.warning("ql: interrupted task %s after %s seconds!", j + START_TASK, end - start)
def train(filename, seed): problem_mood = 4 here = path.dirname(__file__) map_fn = path.join(here, "craft/kitchen.map") rng1 = Random(seed + 1) env1 = Kitchen(map_fn, rng1, 1, 1, problem_mood=problem_mood) init1 = create_init([[1, 1]]) rng4 = Random(seed + 4) env4 = Kitchen(map_fn, rng4, 1, 1, problem_mood=problem_mood) init4 = create_init([[1, 1]]) rng2 = Random(seed + 2) env2 = Kitchen(map_fn, rng2, 1, 1, problem_mood=problem_mood, short=False) init2 = create_init([[1, 1]]) rng3 = Random(seed + 3) env3 = Kitchen(map_fn, rng3, 1, 1, problem_mood=problem_mood, short=True) init3 = create_init([[1, 1]]) tasks = [[OBJECTS4["food"]]] not_task = [] tasks = tasks[START_TASK:END_TASK + 1] with open(filename, "w") as csvfile: print("ql: begin experiment") report2 = SequenceReport(csvfile, LOG_STEP, init2, EPISODE_LENGTH, TRIALS) report3 = SequenceReport(csvfile, LOG_STEP, init3, EPISODE_LENGTH, TRIALS) for j, goal in enumerate(tasks): print("ql: begin task {}".format(j + START_TASK)) rng2.seed(seed + j) reward2 = ReachFacts(env2, goal, []) policy2 = EpsilonGreedy(alpha=1.0, gamma=1.0, epsilon=0.2, default_q=DEFAULT_Q, num_actions=4, rng=rng2) agent2 = Agent(env2, policy2, reward2, rng2) rng3.seed(seed + j) reward3 = ReachFacts(env3, goal, []) policy3 = BaseEmpathic(alpha=1.0, gamma=1.0, epsilon=0.2, default_q=DEFAULT_Q, num_actions=5, rng=rng3, others_q=[policy2.get_Q()], others_init=[[1, 1]], others_dist=[1.0], penalty=-2 * EPISODE_LENGTH, others_alpha=[0.0], objects=OBJECTS4, problem_mood=problem_mood) agent3 = Agent(env3, policy3, reward3, rng3) try: agent2.train(steps=TOTAL_STEPS1, steps_per_episode=EPISODE_LENGTH, report=report2) agent3.train(steps=TOTAL_STEPS1, steps_per_episode=EPISODE_LENGTH, report=report3) report1 = SequenceReport(csvfile, LOG_STEP, init1, EPISODE_LENGTH, TRIALS) reward1 = ReachFacts(env1, goal, not_task) policy1 = BaseEmpathic(alpha=1.0, gamma=1.0, epsilon=0.2, default_q=DEFAULT_Q, num_actions=6, rng=rng1, others_q=[policy2.get_Q()], others_init=[[1, 1]], others_dist=[1.0], penalty=-2 * EPISODE_LENGTH, others_alpha=[0.1], objects=OBJECTS4, problem_mood=problem_mood) agent1 = Agent(env1, policy1, reward1, rng1) agent1.train(steps=TOTAL_STEPS2, steps_per_episode=EPISODE_LENGTH, report=report1) report4 = SequenceReport(csvfile, LOG_STEP, init4, EPISODE_LENGTH, TRIALS) reward4 = ReachFacts(env4, goal, not_task) policy4 = BaseEmpathic(alpha=1.0, gamma=1.0, epsilon=0.2, default_q=DEFAULT_Q, num_actions=6, rng=rng4, others_q=[policy3.get_Q()], others_init=[[1, 1]], others_dist=[1.0], penalty=-2 * EPISODE_LENGTH, others_alpha=[1.0], objects=OBJECTS4, problem_mood=problem_mood) agent4 = Agent(env4, policy4, reward4, rng4) agent4.train(steps=TOTAL_STEPS2, steps_per_episode=EPISODE_LENGTH, report=report4) print("-----------------") print("The single agent in the env without considering others") base_step = evaluate_agent(env2, policy2, reward2, init2) print("Total Reward = ", base_step) print("-----------------") print("Baseline 1, Q-learning") step2 = evaluate_second_agent(env2, policy2, reward2, env3, policy3, reward3) print("Total Reward Compared to Base = ", -1 * (step2[0] - base_step), -1 * (step2[1] - base_step)) print("-----------------") print("Baseline 2, Krakovna et al") step1 = evaluate_second_agent(env1, policy1, reward1, env3, policy3, reward3) print("Total Reward Compared to Base= ", -1 * (step1[0] - base_step), -1 * (step1[1] - base_step)) print("-----------------") print("Our Approach") step3 = evaluate_second_agent(env4, policy4, reward4, env3, policy3, reward3) print("Total Reward Compared to Base= ", -1 * (step3[0] - base_step), -1 * (step3[1] - base_step)) print("-----------------") except KeyboardInterrupt: logging.warning("ql: interrupted task %s after %s seconds!", j + START_TASK, end - start)
def train(filename, seed, foldername, alpha_start, alpha_end): here = path.dirname(__file__) map_fn = path.join(here, "craft/map_seq.map") rng1 = Random(seed) env1 = Craft(map_fn, rng1, 1, 1, objects=OBJECTS1, problem_mood=problem_mood, tool_in_fact=True, wood_in_fact=True) init = create_init(env1.get_all_item(), [[1, 1]], True) tasks = [[OBJECTS1["box"]]] not_task = [] tasks = tasks[START_TASK:END_TASK + 1] with open(filename, "w") as csvfile: print("ql: begin experiment") report = SequenceReport(csvfile, LOG_STEP, init, EPISODE_LENGTH, TRIALS) for j, goal in enumerate(tasks): print("ql: begin task {}".format(j + START_TASK)) rng1.seed(seed + j) reward1 = ReachFacts(env1, goal, not_task, problem_mood) policy1 = EpsilonGreedy(alpha=1.0, gamma=1.0, epsilon=0.5, default_q=DEFAULT_Q, num_actions=4, rng=rng1) agent = Agent(env1, policy1, reward1, rng1) try: start = time() agent.train(steps=TOTAL_STEPS1, steps_per_episode=EPISODE_LENGTH, report=report) for alpha in range(alpha_start, alpha_end): print("alpha:", alpha / 100.0) rng2 = Random(seed + 1) env2 = Craft(map_fn, rng2, 1, 1, objects=OBJECTS1, problem_mood=problem_mood) init2 = create_init(env2.get_all_item(), [[1, 1]]) report2 = SequenceReport(csvfile, LOG_STEP, init2, EPISODE_LENGTH, TRIALS) reward2 = ReachFacts(env2, goal, not_task, problem_mood) policy2 = Empathic(alpha=1.0, gamma=1.0, epsilon=0.5, default_q=DEFAULT_Q, num_actions=5, rng=rng2, others_q=[policy1.get_Q()], penalty=-2 * EPISODE_LENGTH, others_alpha=[alpha / 100.0], objects=OBJECTS1, problem_mood=problem_mood) agent2 = Agent(env2, policy2, reward2, rng2) agent2.train(steps=TOTAL_STEPS2, steps_per_episode=EPISODE_LENGTH, report=report2) test(env2, policy2, reward2, env1, policy1, reward1, init, alpha, foldername) except KeyboardInterrupt: end = time() logging.warning("ql: interrupted task %s after %s seconds!", j + START_TASK, end - start)
def train(filename, seed, alpha2): here = path.dirname(__file__) map_fn = path.join(here, "craft/garden.map") rng1 = Random(seed + 1) env1 = Craft(map_fn, rng1, 1, 5, objects=OBJECTS3, problem_mood=problem_mood) rng2 = Random(seed + 2) env2 = Craft(map_fn, rng2, 1, 5, objects=OBJECTS3, problem_mood=problem_mood, fence=True) init1 = create_init(env1.get_all_item(), [[1, 5]]) init2 = create_init(env1.get_all_item(), [[1, 5]], True) tasks = [[OBJECTS3["target"]]] not_task = [] tasks = tasks[START_TASK:END_TASK + 1] with open(filename, "w") as csvfile: print("ql: begin experiment") report1 = SequenceReport(csvfile, LOG_STEP, init1, EPISODE_LENGTH, TRIALS) report2 = SequenceReport(csvfile, LOG_STEP, init2, EPISODE_LENGTH, TRIALS) for j, goal in enumerate(tasks): print("ql: begin task {}".format(j + START_TASK)) rng2.seed(seed + j) reward2 = ReachFacts(env2, goal, [], problem_mood) policy2 = EpsilonGreedy(alpha=1.0, gamma=1.0, epsilon=0.2, default_q=DEFAULT_Q, num_actions=4, rng=rng2) agent2 = Agent(env2, policy2, reward2, rng2) try: start = time() agent2.train(steps=TOTAL_STEPS1, steps_per_episode=EPISODE_LENGTH, report=report2) reward1 = ReachFacts(env1, goal, not_task, problem_mood) policy1 = Empathic(alpha=1.0, gamma=1.0, epsilon=0.2, default_q=DEFAULT_Q, num_actions=5, rng=rng1, others_q=[policy2.get_Q(), policy2.get_Q()], others_init=[[1, 5], [1, 5]], others_dist=[0.5, 0.5], penalty=-2 * EPISODE_LENGTH, others_alpha=[alpha2, 1.0], objects=OBJECTS3, problem_mood=problem_mood, caring_func="sum", restricted=[[2, 1], []]) agent1 = Agent(env1, policy1, reward1, rng1) agent1.train(steps=TOTAL_STEPS2, steps_per_episode=EPISODE_LENGTH, report=report1) evaluate_agent(env1, policy1, reward1, init1) except KeyboardInterrupt: end = time() logging.warning("ql: interrupted task %s after %s seconds!", j + START_TASK, end - start)