Ejemplo n.º 1
0
 def post_connect(self, agent_id, entities, config):
     self.entity_id = agent_id
     self.world_model = WorldModel()
     self.world_model.add_entities(entities)
     #todo: check whether we need to merge agent config and the config coming from kernel
     self.config = config
     print('post_connect agent_id:' + str(agent_id.get_value()))
Ejemplo n.º 2
0
def run_simulated_mission(model, display=None, use_delays=False):
    print("Simulated mission running.")

    world_model = WorldModel(BLUEPRINT, CONFIG_FILE, simulated=True)
    ticks_left = 5 * MAX_EPISODE_TIME
    total_reward = 0
    current_r = 0

    while (ticks_left > 0 and world_model.is_mission_running()):
        ticks_left -= 1
        current_r = world_model.reward()
        action = model.act(current_r, world_model.get_observation())
        if display is not None:
            display.update(world_model)
        total_reward += current_r
        world_model.simulate(action)
        if use_delays:
            print(action)
            time.sleep(ACTION_DELAY)

    # Collect last reward, and give to model, then end the mission
    current_r = world_model.reward()
    model.act(current_r, world_model.get_observation())
    total_reward += current_r
    model.mission_ended()
    print("Simulated mission ended")

    return total_reward, (MAX_EPISODE_TIME - (ticks_left / 5))
Ejemplo n.º 3
0
    def connect(self, host, port, teamname, version='15.5'):
        """
        Gives us a connection to the server as one player on a team.  This
        immediately connects the agent to the server and starts receiving and
        parsing the information it sends.
        """

        # if already connected, raise an error since user may have wanted to
        # connect again to a different server.
        if self.__connected:
            msg = "Cannot connect while already connected, disconnect first."
            raise sp_exceptions.AgentConnectionStateError(msg)

        # the pipe through which all of our communication takes place
        self.__sock = sock.Socket(host, port)

        # our models of the world and our body
        self.wm = WorldModel(handler.ActionHandler(self.__sock, self.teamname))

        # set the team name of the world model to the given name
        self.wm.teamname = teamname

        # handles all messages received from the server
        self.msg_handler = handler.MessageHandler(self.wm, self.teamname)

        self.action_handler = handler.ActionHandler(port, self.teamname)

        # set up our threaded message receiving system
        self.__parsing = True  # tell thread that we're currently running
        self.__msg_thread = threading.Thread(target=self.__message_loop,
                                             name="message_loop")
        self.__msg_thread.daemon = True  # dies when parent thread dies

        # start processing received messages. this will catch the initial server
        # response and all subsequent communication.
        self.__msg_thread.start()

        # send the init message and allow the message handler to handle further
        # responses.
        init_address = self.__sock.address
        init_msg = "( init %s ( version %s ) )"
        self.__sock.send(init_msg % (teamname, version))
        print(init_msg)

        # wait until the socket receives a response from the server and gets its
        # assigned port.
        while self.__sock.address == init_address:
            time.sleep(0.0001)

        # create our thinking thread.  this will perform the actions necessary
        # to play a game of robo-soccer.
        self.__thinking = False
        self.__think_thread = threading.Thread(target=self.__think_loop,
                                               name="think_loop")
        self.__think_thread.daemon = True

        # set connected state.  done last to prevent state inconsistency if
        # something goes wrong beforehand.
        self.__connected = True
Ejemplo n.º 4
0
 async def setup(self):
     print(
         f"--- arm_agent: PeriodicSenderAgent started at {datetime.datetime.now().time()}"
     )
     init_world_model = WorldModel()
     start_at = datetime.datetime.now() + datetime.timedelta(
         seconds=init_world_model.current_world_model.
         init_delay_seconds["arm"])
     bdi_behaviour = self.BDIBehaviour(
         period=init_world_model.current_world_model.
         real_time_clock_period_seconds["arm"],
         start_at=start_at)
     self.add_behaviour(bdi_behaviour)
Ejemplo n.º 5
0
        async def on_start(self):
            self.terminate = False
            self.SUCCESS = False
            self.verbose = False

            # Initialization
            self.beliefs = WorldModel()  # B := B0; Initial Beliefs
            self.goals = self.beliefs.current_world_model.goals
            self.intentions = self.beliefs.current_world_model.goals  # I := I0; Initial Intentions
            self.htn_planner = HierarchicalTaskNetworkPlanner(self.beliefs)
            self.perception = Perception(self.beliefs)
            self.coordination = Coordination(self.beliefs)
            self.monitoring = Monitoring()

            self.what, self.why, self.how_well, self.what_else, self.why_failed = "", "", "", "", ""
            self.plans = []
            self.selected_plan = []
            self.percept = {}
            self.action = ""
            self.start_time = datetime.datetime.now()
Ejemplo n.º 6
0
def run_simulated_mission(model, mission, cfg, demo=False):
    print("Simulated mission running.")

    world_model = WorldModel(mission.blueprint,
                             cfg,
                             simulated=True,
                             agent_pos=mission.start_position)
    ticks_left = 5 * mission.max_episode_time
    total_reward = 0
    current_r = 0
    use_delays = mission.action_delay > 0

    while (ticks_left > 0 and world_model.is_mission_running()):
        ticks_left -= 1
        current_r = world_model.reward()
        if demo:
            action = model.demo_act(world_model.get_observation())
        else:
            action = model.act(current_r, world_model.get_observation())
        if mission.display is not None:
            mission.display.update(world_model)
        total_reward += current_r
        world_model.simulate(action)
        if use_delays:
            print(action)
            time.sleep(mission.action_delay)

    # Collect last reward, and give to model, then end the mission
    if mission.display is not None:
        mission.display.update(world_model)
    current_r = world_model.reward()
    if not demo:
        model.act(current_r, world_model.get_observation())
    total_reward += current_r
    model.mission_ended()
    print("Simulated mission ended")

    return MissionStats(reward=total_reward,
                        length=(mission.max_episode_time - (ticks_left / 5)))
Ejemplo n.º 7
0
        async def on_start(self):

            self.terminate = False
            self.SUCCESS = False
            self.verbose = False
            # Initialization
            self.htn_planner = HierarchicalTaskNetworkPlanner()
            self.goal = [('transfer_target_object_to_container', 'arm',
                          'target_object', 'table', 'container')]
            self.intentions = self.goal  # I := I0; Initial Intentions
            self.beliefs = WorldModel()  # B := B0; Initial Beliefs
            self.monitoring = Monitoring()
            self.perception = Perception()
            self.coordination = Coordination()

            # Disable all 3 coordination switches for testing
            self.coordination.control.send_requests = True
            self.coordination.control.center_init = False
            self.coordination.control.detect_last_position = True

            self.what, self.why, self.how_well, self.what_else, self.why_failed = "", "", "", "", ""
            self.plans = []
            self.start_time = datetime.datetime.now()
Ejemplo n.º 8
0
def run_mission(model, display=None):

    # Create default Malmo objects:
    my_mission = MalmoPython.MissionSpec(MISSION_XML, True)
    my_mission_record = MalmoPython.MissionRecordSpec()
    world_model = WorldModel(BLUEPRINT, CONFIG_FILE, simulated=False)
    # Attempt to start a mission:
    for retry in range(MAX_RETRIES):
        try:
            AGENT_HOST.startMission(my_mission, my_mission_record)
            break
        except RuntimeError as e:
            if retry == MAX_RETRIES - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2**retry)

    # Loop until mission starts:
    print("Waiting for the mission to start ", end=' ')
    world_state = AGENT_HOST.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        world_state = AGENT_HOST.getWorldState()
        for error in world_state.errors:
            print("Error:", error.text)

    print("\nMission running.")

    total_reward = 0
    current_r = 0

    start = time.time()
    # Loop until mission ends
    while (world_state.is_mission_running
           and world_model.is_mission_running()):
        world_state = AGENT_HOST.getWorldState()
        for error in world_state.errors:
            print("Error:", error.text)
        current_r += sum(r.getValue() for r in world_state.rewards)
        if len(world_state.observations) > 0:
            raw_obs = json.loads(world_state.observations[-1].text)
            world_model.update(raw_obs)
            current_r += world_model.reward()
            action = model.act(current_r, world_model.get_observation())
            if display is not None:
                display.update(world_model)
            total_reward += current_r
            current_r = 0
            if world_model.mission_complete(
            ) or not world_model.agent_in_arena():
                AGENT_HOST.sendCommand('quit')
            elif world_state.is_mission_running:
                AGENT_HOST.sendCommand(action)
        time.sleep(ACTION_DELAY)
    end = time.time()

    model.mission_ended()

    print()
    print("Mission ended")

    return total_reward, end - start
Ejemplo n.º 9
0
                               goal,
                               verbose=self.verbose,
                               all_plans=True,
                               sort_asc=True)
        else:
            return ""


if __name__ == '__main__':

    htn_planner = HierarchicalTaskNetworkPlanner()
    end_goal = [('transfer_target_object_to_container', 'arm', 'target_object',
                 'table', 'container')]
    intentions = end_goal  # I := I0; Initial Intentions
    from world_model import WorldModel
    beliefs = WorldModel()  # B := B0; Initial Beliefs

    print()
    beliefs.current_world_model.xyz["target_object"] = [-10, -10, 0]
    htn_plans = htn_planner.get_plans(
        beliefs.current_world_model,
        intentions)  # π := plan(B, I); MEANS_END REASONING
    if not htn_plans:
        print("-- No valid plan. Failure_reason: {}".format(
            htn_planner.failure_reason))
    else:
        beliefs.current_world_model.plans = htn_plans
        print("== Best current_world_model.plan: ",
              beliefs.current_world_model.plans[0])

    print()
Ejemplo n.º 10
0
        elif action == ('close_hand', ):
            action_successful = self.control.close_hand(
                world_model.size["object_side_length"])
        elif action == ('move_arm_above', 'container'):
            action_successful = self.control.move_arm_above_xyz(
                world_model.xyz["container"],
                world_model.location["servo_values"], 14)

        return action_successful


if __name__ == '__main__':

    # Sequence for testing
    from world_model import WorldModel
    current_world_model = WorldModel()
    coordination = Coordination(current_world_model)
    coordination.control.control_world_model["send_requests"] = False
    coordination.control.control_world_model["center_init"] = False
    coordination.control.control_world_model["detect_last_position"] = False
    coordination.execute_action(('initialize', 'arm'),
                                current_world_model.current_world_model)
    coordination.execute_action(('open_hand', ),
                                current_world_model.current_world_model)
    coordination.execute_action(('move_arm_above', 'target_object'),
                                current_world_model.current_world_model)
    coordination.execute_action(('move_arm', 'target_object'),
                                current_world_model.current_world_model)
    coordination.execute_action(('close_hand', ),
                                current_world_model.current_world_model)
    coordination.execute_action(('move_arm_up', 'target_object'),
Ejemplo n.º 11
0
def run_malmo_mission(model,
                      mission,
                      mission_xml,
                      cfg,
                      agent_host,
                      max_retries=5,
                      demo=False):
    # Create default Malmo objects:
    my_mission = MalmoPython.MissionSpec(mission_xml, True)
    my_mission_record = MalmoPython.MissionRecordSpec()
    world_model = WorldModel(mission.blueprint, cfg, simulated=False)
    # Attempt to start a mission:
    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_mission_record)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2**retry)

    # Loop until mission starts:
    print("Waiting for the mission to start ", end=' ')
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        world_state = agent_host.getWorldState()
        time.sleep(1)
        for error in world_state.errors:
            print("Error:", error.text)

    print("\nMission running.")

    total_reward = 0
    current_r = 0

    start = time.time()
    # Loop until mission ends
    while (world_state.is_mission_running
           and world_model.is_mission_running()):
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:", error.text)
        current_r += sum(r.getValue() for r in world_state.rewards)
        if len(world_state.observations) > 0:
            raw_obs = json.loads(world_state.observations[-1].text)
            world_model.update(raw_obs)
            current_r += world_model.reward()
            if demo:
                action = model.demo_act(world_model.get_observation())
            else:
                action = model.act(current_r, world_model.get_observation())
            if mission.display is not None:
                mission.display.update(world_model)
            total_reward += current_r
            current_r = 0
            if world_model.mission_complete(
            ) or not world_model.agent_in_arena():
                agent_host.sendCommand('quit')
            elif world_state.is_mission_running:
                agent_host.sendCommand(action)
                if demo:
                    print(action)
        time.sleep(mission.action_delay)
    end = time.time()

    model.mission_ended()

    print()
    print("Mission ended")

    return MissionStats(reward=total_reward, length=end - start)
Ejemplo n.º 12
0
                    input_world_model.current_world_model.distance = percept[
                        "distance"]
                elif key == "location":
                    for key2 in percept["location"]:
                        if key2 == "servo_values":
                            input_world_model.current_world_model.location["servo_values"] = \
                                percept["location"]["servo_values"]

        return input_world_model


if __name__ == '__main__':

    # Sequence for testing
    from world_model import WorldModel
    world_model = WorldModel()
    perception = Perception(world_model)
    perception.perception_world_model["write_video"] = False
    import time
    from world_model import WorldModel
    beliefs = WorldModel()

    time.sleep(0.1)
    current_percept = {"xyz": {'target_object': [15, 15, 0]}}
    beliefs.update_tick()
    beliefs = perception.belief_revision(beliefs, current_percept)

    time.sleep(0.1)
    current_percept = {"xyz": {'target_object': [14, 16, 0]}}
    beliefs.update_tick()
    beliefs = perception.belief_revision(beliefs, current_percept)
Ejemplo n.º 13
0
NUM_HALLUCINATIONS = 200
NUM_ROLLOUTS = 10
MAX_ENV_STEPS = 100_000

env = gym.make('Breakout-v0')
writer = SummaryWriter()
resize = torchvision.transforms.Resize((42, 32))

agent = CSPN_A2C(breakout_test.LATENT_DIM,
                 breakout_test.ACTION_DIM,
                 breakout_test.PolicyNetwork(),
                 breakout_test.ValueNetwork(),
                 continuous=breakout_test.CONTINOUS)
autoencoder = breakout_test.AutoEncoder(42, 32)
world_model_cspn = breakout_test.ForwardModelCSPN()
world_model = WorldModel(breakout_test.LATENT_DIM, 1, autoencoder.encode,
                         autoencoder.decode, autoencoder, world_model_cspn)

for epoch in range(NUM_EPOCHS):
    print("AT EPOCH:", epoch)

    starting_obs = []
    buffer = ExperienceReplayBuffer()
    for rollout_idx in range(NUM_ROLLOUTS):
        print("EPISODE #", rollout_idx)
        obs = env.reset()

        obs = np.rollaxis(obs, 2, 0)
        obs = resize(torch.Tensor(obs))
        obs = obs.unsqueeze(0) / 255.0
        episode = []
        starting_obs.append(obs)
Ejemplo n.º 14
0
                elif key == "location":  # TODO: if xyz of object within limits -> on table else -> not on table
                    for key2 in percept["location"]:
                        if key2 == "target_object":
                            world_model.current_world_model.location["target_object"] = percept["location"][
                                "target_object"]
                elif key == "initialized":  # TODO: if servos at 1500-ish -> initialized
                    world_model.current_world_model.initialized = percept["initialized"]

        return world_model


if __name__ == '__main__':

    # Sequence for testing
    from world_model import WorldModel
    beliefs = WorldModel()
    monitoring = Monitoring()

    time.sleep(0.1)
    current_percept = {"distance": {'distance_to_gripper': 8.2}}
    beliefs.update_tick()
    beliefs = monitoring.fire_events(beliefs, current_percept)

    time.sleep(0.1)
    current_percept = {"distance": {'distance_to_gripper': 5.2}}
    beliefs.update_tick()
    beliefs = monitoring.fire_events(beliefs, current_percept)

    time.sleep(0.1)
    current_percept = {"distance": {'distance_to_gripper': 2.2}}
    beliefs.update_tick()
Ejemplo n.º 15
0
def main():

    with tf.Session() as sess:
        output_filename = "log.csv"

        model_learning_rate = 1e-2
        model_hidden_size = 256
        model_training_episodes_per_batch = 5
        model_training_batches_per_training = 100

        policy_learning_rate = 1e-2
        policy_hidden_size = 8
        policy_training_episodes_per_batch = 5
        policy_training_batches_per_training = 10
        policy_evaluation_episodes = 20

        evaluation_episodes = 10
        num_rounds = 100

        env = gym.make('CartPole-v0')
        state_space_size = env.observation_space.shape[0]
        action_space_size = env.action_space.n
    
        world_model = WorldModel(state_space_size, action_space_size, model_hidden_size)
        policy = Policy(sess, state_space_size, action_space_size, policy_hidden_size)

        start_state_buffer = CircularBuffer(20)
        state_initializer = lambda: start_state_buffer.get()

        sess.run(tf.global_variables_initializer())

        def make_episode_batch(env, policy, batch_size, max_length=None):
            """ Uses a black-box policy to generate an epsiode for training the model. """
            states_in = []
            states_out = []
            actions = []
            rewards = []
            dones = []

            for b in range(batch_size):
                states_in_this_ep = []
                states_out_this_ep = []
                actions_this_ep = []
                rewards_this_ep = []
                dones_this_ep = []

                s = env.reset()
                done = False
                length = 0
                while (not done) and (max_length is None or length < max_length):
                    length += 1
                    a = policy(s)
                    s1, reward, done, _ = env.step(a)

                    states_in_this_ep.append(s)
                    states_out_this_ep.append(s1)
                    actions_this_ep.append(a)
                    rewards_this_ep.append([reward])
                    dones_this_ep.append([1.0 if done else 0.0])

                    s = s1

                states_in_this_ep = np.stack(states_in_this_ep, axis=0)
                states_out_this_ep = np.stack(states_out_this_ep, axis=0)
                actions_this_ep = np.stack(actions_this_ep, axis=0)
                rewards_this_ep = np.stack(rewards_this_ep, axis=0)
                dones_this_ep = np.stack(dones_this_ep, axis=0)

                states_in.append(states_in_this_ep)
                states_out.append(states_out_this_ep)
                actions.append(actions_this_ep)
                rewards.append(rewards_this_ep)
                dones.append(dones_this_ep)

            return states_in, states_out, actions, rewards, dones
    

        output_logfile = open(output_filename, 'wt')
        output_logfile.write("epoch,model_state_mse,model_reward_mse,model_done_ce,policy_model_reward,policy_env_reward\n")

        for r in range(1, num_rounds+1):
            # Train the world model on episodes generated using the policy
            model_loss = [0.0, 0.0, 0.0, 0.0]
            for b in range(model_training_batches_per_training):
                states_in, states_out, actions, rewards, dones = make_episode_batch(env, policy.apply, model_training_episodes_per_batch)
                for start_state in [x[0] for x in states_in]:
                    start_state_buffer.put(start_state)
                this_loss = world_model.train_on_episodes(np.concatenate(states_in, axis=0),
                                                          np.concatenate(actions, axis=0),
                                                          np.concatenate(states_out, axis=0),
                                                          np.concatenate(rewards, axis=0),
                                                          np.concatenate(dones, axis=0), learning_rate=1e-4, sess=sess)
                model_loss = [x + this_loss[i] for (i, x) in enumerate(model_loss)]
            model_loss = [x / model_training_batches_per_training for x in model_loss]
            print("Model MSE: {}".format(model_loss))

            # Train the policy on the world model
            total_reward = 0.0
            for b in range(policy_training_batches_per_training):
                for ep in range(policy_training_episodes_per_batch):
                    total_reward += policy.run_episode_and_accumulate_gradients(world_model.env_analogue(sess, state_initializer=state_initializer))
                policy.apply_accumulated_gradients(policy_learning_rate)
            total_reward /= (policy_training_batches_per_training * policy_training_episodes_per_batch)
            print("Policy reward in model: {}".format(total_reward))

            # Evaluate the policy on the real environment
            evaluation_reward = 0.0
            for ep in range(policy_evaluation_episodes):
                evaluation_reward += policy.run_episode_and_accumulate_gradients(env)
            policy.clear_grad_buffers()
            evaluation_reward /= policy_evaluation_episodes
            print("Policy reward in real env: {}".format(evaluation_reward))

            output_logfile.write("{},{},{},{},{},{}\n".format(r, model_loss[1], model_loss[2], model_loss[3], total_reward, evaluation_reward))
            output_logfile.flush()

        output_logfile.close()