Ejemplo n.º 1
0
def main(argv):
    # print_squidward()

    # FileManager: load specs used in experiment
    fm = FileManager()
    try:
        spec_summary = fm.load_spec_summary(FLAGS.specs)
        print(spec_summary)
        fm.change_cwd(spec_summary["ROOT_DIR"])
    except:
        print("Loading specs/model failed. Have you selected the right path?")
        exit()
    fm.create_test_file()

    agent = setup_agent(spec_summary)
    agent.DQN.load(FLAGS.model)
    agent.set_testing_mode()


    # setup environment in testing mode
    env = gym.make("sc2-v0")
    obs, reward, done, info = env.setup(spec_summary, "testing")

    while(True):
        # Action selection
        action = agent.policy(obs, reward, done, info)
        # print(action)

        if (action is 'reset'):  # Resetting the environment
            obs, reward, done, info = env.reset()
            # No saving of model in test_mode
        else:  # Peforming selected action
            obs, reward, done, info = env.step(action)
            test_report = agent.evaluate(obs, reward, done, info)
            fm.log_test_reports(test_report)

        if env.finished:
            print("Finished testing.")
            break
Ejemplo n.º 2
0
def main(argv):
	try:
		# FileManager: Save specs and create experiment
		fm = FileManager()
		try:
			fm.create_experiment(agent_specs["EXP_NAME"])  # Automatic cwd switch
			fm.save_specs(agent_specs, mv2beacon_specs)
		except:
			print("Creating experiment or saving specs failed.")
			exit()
		fm.create_train_file()

		# Create HDRL agent
		agent = setup_agent(agent_specs)

		# Extract skills
		skill_name_list = ['move2beacon','collectmineralshards']

		skill_specs_list = fm.extract_skill_specs(skill_name_list)

		agent_list = setup_multiple_agents(skill_specs_list)
		move2beacon_agent = agent_list[0]
		collectmineralshards_agent = agent_list[1]
		move2beacon_agent.DQN.load(fm.main_dir + '/assets/skills/' + skill_name_list[0] + '/model.pt') # Load model into agent
		collectmineralshards_agent.DQN.load(fm.main_dir + '/assets/skills/' + skill_name_list[1] + '/model.pt') # Load model into agent
		move2beacon_skill = move2beacon_agent.DQN # Extract DQN skill network
		collectmineralshards_skill = collectmineralshards_agent.DQN
		agent.add_skill_list([move2beacon_skill, collectmineralshards_skill])
		print_ts("Code ran through")
		exit()

		# Create environment
		env = gym.make("gym-sc2-m2b-v0")
		obs, reward, done, info = env.setup(mv2beacon_specs, "learning")

		while(True):
        	# Action selection
			action = agent.policy(obs, reward, done, info)


			if (action is 'reset'):  # Resetting the environment
				obs, reward, done, info = env.reset()
				if agent.episodes % agent.model_save_period == 0:
					agent.save_model(fm.get_cwd())
			else:  # Peforming selected action
				obs, reward, done, info = env.step(action)
				dict_agent_report = agent.evaluate(obs, reward, done, info)
				fm.log_training_reports(dict_agent_report)

			if env.finished:
				print("Finished learning.")
				break
	except KeyboardInterrupt:
		agent.save_model(fm.get_cwd(), emergency=True)
		exit()
Ejemplo n.º 3
0
def main(argv):
    # print_squidward()

    try:
        # FileManager: Save specs and create experiment
        fm = FileManager()
        try:
            fm.create_experiment(
                agent_specs["EXP_NAME"])  # Automatic cwd switch
            fm.save_specs(agent_specs, mv2beacon_specs)
        except:
            print("Creating eperiment or saving specs failed.")
            exit()
        fm.create_train_file()

        agent = setup_agent(agent_specs)
        # Loading of model possible fot training mode
        agent.set_supervised_mode()

        # setup environment in learning mode
        env = gym.make("gym-sc2-m2b-v0")

        obs, reward, done, info = env.setup(mv2beacon_specs, "learning")

        while (True):
            # Action selection
            action = agent.policy(obs, reward, done, info)

            if (action is 'reset'):  # Resetting the environment
                obs, reward, done, info = env.reset()
                if agent.episodes % agent_specs['MODEL_SAVE_PERIOD'] == 0:
                    agent.save_model(fm.get_cwd())
            else:  # Peforming selected action
                obs, reward, done, info = env.step(action)
                dict_agent_report = agent.evaluate(obs, reward, done, info)
                fm.log_training_reports(dict_agent_report)

            if env.finished:
                print("Finished learning.")
                break
    except KeyboardInterrupt:
        agent.save_model(fm.get_cwd(), emergency=True)
        exit()
Ejemplo n.º 4
0
def main(argv):
    # print_squidward()

    # FileManager: Save specs and create experiment
    fm = FileManager()
    try:
        fm.create_experiment(agent_specs["EXP_NAME"])  # Automatic cwd switch
        fm.save_specs(agent_specs, cartpole_specs)
    except:
        print("Creating eperiment or saving specs failed.")
        exit()
    fm.create_train_file()

    # show_extracted_screen(get_screen(env))
    plotter = Plotter()

    # No FileManager yet
    agent = CartPoleAgent(agent_specs)
    agent.set_learning_mode()

    env = gym.make('CartPole-v0').unwrapped

    num_episodes = cartpole_specs["EPISODES"]
    for e in range(num_episodes):

        # Initialize the environment and state
        state, reward, done, info = env.reset()
        last_screen = get_screen(env)
        current_screen = get_screen(env)
        state = current_screen - last_screen

        if e % 50 == 0 and e != 0:
            agent.save_model(fm.get_cwd())

        for t in count():
            # Select and perform an action
            action = agent.policy(state, reward, done, info)
            _, reward, done, info = env.step(action)
            last_screen = current_screen
            current_screen = get_screen(env)
            if not done:
                next_state = current_screen - last_screen
            else:
                next_state = None
                agent.episodes += 1
                # agent.update_target_network()
                plotter.episode_durations.append(t + 1)
                plotter.plot_durations()

            print(e, t, reward, action, len(agent.DQN.memory), agent.epsilon)
            # print(agent.DQN.state_q_values)
            # Store the transition in memory
            train_report = agent.evaluate(next_state, reward, done, info)
            fm.log_training_reports(train_report)

            # Move to the next state
            state = next_state

            if done:
                break

        agent.update_target_network()

    agent.save_model(fm.get_cwd())
    print('Training complete')
    env.close()
    plotter.close()
Ejemplo n.º 5
0
def main(argv):
    # print_squidward()

    # FileManager: Save specs and create experiment
    fm = FileManager()
    try:
        spec_summary = fm.load_spec_summary(FLAGS.specs)
        fm.change_cwd(spec_summary["ROOT_DIR"])
    except:
        print("Loading specs/model failed. Have you selected the right path?")
        exit()
    fm.create_test_file()

    # show_extracted_screen(get_screen(env))
    plotter = Plotter()

    # No FileManager yet
    agent = CartPoleAgent(agent_specs)
    agent.DQN.load(FLAGS.model)
    agent.set_testing_mode()

    list_reward_per_episode = []

    env = gym.make('CartPole-v0').unwrapped

    num_episodes = int(spec_summary["TEST_EPISODES"])
    for e in range(num_episodes):

        # Initialize the environment and state
        state, reward, done, info = env.reset()
        last_screen = get_screen(env)
        current_screen = get_screen(env)
        state = current_screen - last_screen

        reward_cnt = 0
        try:
            print("Episode {} | Last reward: {}".format(
                e, list_reward_per_episode[-1]))
        except:
            pass

        for t in count():
            # Select and perform an action
            action = agent.policy(state, reward, done, info)
            _, reward, done, info = env.step(action)
            reward_cnt += reward
            last_screen = current_screen
            current_screen = get_screen(env)
            if not done:
                next_state = current_screen - last_screen
            else:
                next_state = None
                agent.episodes += 1
                plotter.episode_durations.append(t + 1)
                plotter.plot_durations()

            # Store the transition in memory
            test_report = agent.evaluate(next_state, reward, done, info)
            fm.log_test_reports(test_report)

            # Move to the next state
            state = next_state

            if done:
                list_reward_per_episode.append(reward_cnt)
                dict_test_report = {
                    "RewardPerEpisode": list_reward_per_episode
                }
                fm.log_test_reports(dict_test_report)
                break

    print('Testing complete')
    env.close()
    plotter.close()