Example #1
0
def main():
    signal.signal(signal.SIGINT, terminate_program)

    # rob = robobo.HardwareRobobo(camera=True).connect(address="192.168.1.7")
    rob = robobo.SimulationRobobo(number='').connect(address='127.0.0.1',
                                                     port=19997)

    rob.play_simulation()

    rob.set_phone_tilt(0.8, 50)
    image = rob.get_image_front()
    print(blob_detection.detect(image))
    cv2.imwrite("test_pictures.png", image)
    blob_detection.detect(image)
    exit()
    # Following code moves the robot
    for i in range(10):
        print("robobo is at {}".format(rob.position()))
        rob.move(75 + 10 * i, 75 + 10 * i, 200)

    rob.stop_world()
    return
    print("robobo is at {}".format(rob.position()))
    rob.sleep(1)

    # Following code moves the phone stand
    rob.set_phone_pan(343, 100)
    time.sleep(1)
    rob.set_phone_pan(11, 100)
    rob.set_phone_tilt(26, 100)

    # Following code makes the robot talk and be emotional
    rob.set_emotion('happy')
    rob.talk('Hi, my name is Robobo')
    rob.sleep(1)
    rob.set_emotion('sad')

    # Following code gets an image from the camera
    image = rob.get_image_front()
    cv2.imwrite("test_pictures.png", image)

    time.sleep(0.1)

    # IR reading
    for i in range(1000000):
        # print("ROB Irs: {}".format(np.log(np.array(rob.read_irs()))/10))
        print("ROB Irs: {}".format(np.array(rob.read_irs()) / 10))
        time.sleep(0.1)

    # pause the simulation and read the collected food
    rob.pause_simulation()

    # Stopping the simualtion resets the environment
    rob.stop_world()
    def train(self, n_episodes, max_steps, filename = None, shuffle = False):
        data = []
        for _ in range(n_episodes):
            print(f'Episode {_}')
            robbo_num = None
            if shuffle == True:
                self.rob.disconnect()
                robbo_num = _%3
                if robbo_num != 1:
                    robbo_num = f'#{robbo_num}'
                else:
                    robbo_num = ''
                # print(robbo_num)
                self.rob = robobo.SimulationRobobo(robbo_num).connect(address='127.0.0.1', port=19997)
            self.initEnv()
            first_move = self.generateRandomMove()
            self.executeMove(first_move)
            # print(first_move,self.current_state, self.current_reward)
            
            for i in range(max_steps):

                next_move = self.generateMoveFromPolicy()
                self.executeMove(next_move)
                print(self.current_action,self.current_state, self.current_reward, self.stuck_count, self.full_stuck)

            df = pd.DataFrame(self.previous_data)
            df['sensor_max'] = df['sensor_data'].apply(lambda x:np.max(x))
            df['sensor_mean'] = df['sensor_data'].apply(lambda x: np.mean(x))
            _dict = {}
            _dict['total_reward'] = sum(self.reward_history)
            _dict['stuck_count'] = self.stuck_count
            _dict['sensor_mean'] = np.mean(df['sensor_mean'].values)
            _dict['sensor_max'] = np.mean(df['sensor_max'].values)
            _dict['robbo'] = robbo_num
            _dict['full_stuck_count'] = self.full_stuck
            print(_dict)
            data.append(_dict)
        self.rob.stop_world()
        
        if filename:
            with open(filename, 'wb') as f:
                print('Training Finished')
                np.save(f, self.q_table)
                print('Q table saved: ', filename)
            df = pd.DataFrame(data)
            filename = filename.split('.')[0]+'_train'+'.csv'
            df.to_csv(filename, index=False)
            print('data saved at ', filename)
def main():
    signal.signal(signal.SIGINT, terminate_program)

    # rob = robobo.HardwareRobobo(camera=True).connect(address="192.168.1.7")
    global rob
    rob = robobo.SimulationRobobo().connect(address="127.0.0.1", port=19997)

    try:
        simulation(rob)
    except Exception as e:
        cv2.imshow("img", rob.get_image_front())
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        print(e)
        traceback.print_exc()
        rob.pause_simulation()
Example #4
0
def main():
    signal.signal(signal.SIGINT, terminate_program)

    # rob = robobo.HardwareRobobo(camera=True).connect(address="192.168.1.5")
    rob = robobo.SimulationRobobo().connect(address='192.168.1.10', port=19997)

    rob.play_simulation()
    # rob.pause_simulation()

    # move and talk
    # rob.set_emotion('sad')

    print("robobo is at {}".format(rob.position()))
    rob.move(5, 5, 2000)
    print("robobo is at {}".format(rob.position()))

    # # Following code moves the phone stand
    # rob.set_phone_pan(343, 100)
    # rob.set_phone_tilt(109, 100)
    # time.sleep(1)
    # rob.set_phone_pan(11, 100)
    # rob.set_phone_tilt(26, 100)

    # rob.talk('Hi, my name is Robobo')
    # rob.sleep(1)
    # rob.set_emotion('happy')

    # Following code gets an image from the camera
    # image = rob.get_image_front()
    # cv2.imwrite("test_pictures.png",image)

    # time.sleep(0.1)

    # # IR reading
    # for i in range(10):
    #     print("ROB Irs: {}".format(rob.read_irs()))
    #     time.sleep(0.1)

    # pause the simulation and read the collected food
    rob.pause_simulation()
    print("Robobo collected {} food".format(rob.collected_food()))

    # Stopping the simualtion resets the environment
    rob.stop_world()
    def __init__(self, config):

        super(ForagingEnv, self).__init__()

        # params

        self.config = config

        self.max_food = 7
        self.food_reward = 10
        self.sight_reward = 0.1

        # init
        self.done = False
        self.total_success = 0
        self.current_step = 0
        self.duration = 0
        self.start_time = None
        self.exp_manager = None

        # Define action and sensors space
        self.action_space = spaces.Box(low=0,
                                       high=1,
                                       shape=(4, ),
                                       dtype=np.float32)
        # why high and low?
        self.observation_space = spaces.Box(low=0,
                                            high=1,
                                            shape=(5, ),
                                            dtype=np.float32)

        self.action_selection = ActionSelection(self.config)

        self.robot = False
        while not self.robot:
            if self.config.sim_hard == 'sim':
                self.robot = robobo.SimulationRobobo().connect(
                    address=self.config.robot_ip, port=self.config.robot_port)
            else:
                self.robot = robobo.HardwareRobobo(camera=True).connect(
                    address=self.config.robot_ip_hard)

            time.sleep(1)
Example #6
0
def test():
    rob = robobo.SimulationRobobo().connect(address='127.0.0.1', port=19997)

    rob.play_simulation()

    # prey_robot = robobo.SimulationRoboboPrey().connect(address='127.0.0.1', port=19989)

    # prey_controller = prey.Prey(robot=prey_robot, level=2)

    # prey_controller.start()

    prey_controller, prey_robot = startPrey()

    for i in range(10):
        print("robobo is at {}".format(rob.position()))
        rob.move(5, 5, 2000)

    stopPrey(prey_controller, prey_robot)
    rob.stop_world()
def main():
    signal.signal(signal.SIGINT, terminate_program)

    rob = robobo.SimulationRobobo().connect(address='127.0.0.1', port=19997)

    rob.play_simulation()

    prey_robot = robobo.SimulationRoboboPrey().connect(address='127.0.0.1',
                                                       port=19989)

    prey_controller = prey.Prey(robot=prey_robot, level=2)

    prey_controller.start()

    for i in range(10):
        print("robobo is at {}".format(rob.position()))
        rob.move(5, 5, 2000)

    prey_controller.stop()
    prey_controller.join()
    prey_robot.disconnect()
    rob.stop_world()

    # time.sleep(10)
    # rob.kill_connections()
    # rob = robobo.SimulationRobobo().connect(address='192.168.1.6', port=19997)
    rob.play_simulation()
    # prey_robot = robobo.SimulationRoboboPrey().connect(address='192.168.1.71', port=19989)
    # prey_controller = prey.Prey(robot=prey_robot, level=2, hardware=True)
    prey_robot = robobo.SimulationRoboboPrey().connect(address='127.0.0.1',
                                                       port=19989)
    prey_controller = prey.Prey(robot=prey_robot, level=2)

    prey_controller.start()
    for i in range(10):
        print("robobo is at {}".format(rob.position()))
        rob.move(5, 5, 2000)
    prey_controller.stop()
    prey_controller.join()

    rob.stop_world()
 def __init__(self, actions, n_observations):
     """
     :param actions: list of actions. Each action is a four-tuple (left_speed, right_speed, duration, direction(-1=backwards, 1=forward))
     :param n_observations: number of sensors
     """
     self.rob = robobo.SimulationRobobo(info.client).connect(
         address=info.ip, port=19997)
     # using action and observation spaces of Gym to minimize code alterations.
     self.actions = actions
     self.action_space = Discrete(len(actions))
     self.rob.play_simulation()
     self.observations = self.get_sensor_observations()
     self.observation_space = Box(low=0.0,
                                  high=1.0,
                                  shape=(n_observations, ))
     self.time_per_episode = 20000  # 20 seconds
     self.time_passed = 0
     self.df = pd.DataFrame()
     self.v_measure_calc_distance = 0
     self.v_measure_sensor_distance = 0
     self.accu_reward = 0
     self.accu_v_measure_sensor_distance = 0
     self.episode_counter = 0
Example #9
0
NUMBER_OF_GENERATIONS = 20
NUMBER_OF_INPUTS = 8
NUMBER_OF_OUTPUTS = 2
NUMBER_OF_HIDDEN_Neurons_1 = 2
NUMBER_OF_HIDDEN_Neurons_2 = 2
NUMBER_OF_WEIGHTS = (NUMBER_OF_INPUTS + 1) * NUMBER_OF_HIDDEN_Neurons_1 + (
    NUMBER_OF_HIDDEN_Neurons_1 + 1) * NUMBER_OF_HIDDEN_Neurons_2 + (
        NUMBER_OF_HIDDEN_Neurons_2 + 1) * NUMBER_OF_OUTPUTS
NUMBER_OF_SIGMAS = NUMBER_OF_WEIGHTS

MU = 10  # how many parents per generation
LAMBDA = 30  # how many children per generation
LOWER = NUMBER_OF_WEIGHTS * [-1]
UPPER = NUMBER_OF_WEIGHTS * [1]

rob2 = robobo.SimulationRobobo(number='#0').connect(address='127.0.0.1',
                                                    port=19997)
rob = robobo.SimulationRobobo(number='#0').connect(address='127.0.0.1',
                                                   port=19998)


def fitness(c, weights):
    fitnessScore = 0
    number_of_collisions = 0
    controller = c(weights)
    while (rob.is_simulation_running()):
        pass
    rob.play_simulation()
    start = rob.get_sim_time()
    prior_irs = 'None'
    min2_irs = 'None'
    min3_irs = 'None'
Example #10
0
#!/usr/bin/env python2
from __future__ import print_function
import time
import robobo
import cv2

if __name__ == "__main__":
    robs = [
        # robobo.HardwareRobobo(camera=True).connect(address="192.168.1.66"),
        robobo.SimulationRobobo().connect(address='192.168.1.135', port=19997),
    ]

    robs[0].play_simulation()
    # robs[0].pause_simulation()
    
    # move and talk
    for i, rob in enumerate(robs):
        print('sending commands to robot {}'.format(i))
        rob.set_emotion('sad')
        
        print("robobo is at {}".format(rob.position()))
        rob.move(10, 10, 2000)
        print("robobo is at {}".format(rob.position()))

        ## Following code moves the phone stand
        # rob.set_phone_pan(343, 100)
        # rob.set_phone_tilt(109, 100)
        # time.sleep(1)
        # rob.set_phone_pan(11, 100)
        # rob.set_phone_tilt(26, 100)
def fitness(controller):
    signal.signal(signal.SIGINT, terminate_program)

    # rob = robobo.HardwareRobobo(camera=True).connect(address="192.168.2.28")
    rob = robobo.SimulationRobobo().connect(address='127.0.0.1', port=19997)

    rob.play_simulation()

    # Following code moves the robot
    with open('output.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(
            ['time', 'x', 'y', 'z', 'sensor1', 'sensor2', 'sensor3', 'sensor4', 'sensor5', 'sensor6', 'sensor7', 'sensor8'])

        rob.play_simulation()

        end = rob.get_sim_time() + 60 * 1000
        now = rob.get_sim_time()

        while now < end:
            position = rob.position()
            irs = np.array(rob.read_irs())

            now = rob.get_sim_time()

            file.write("{},{},{},{},{},{},{},{},{},{},{},{}\n".format(
                now, position[0], position[1],
                position[2], irs[0], irs[1], irs[2],
                irs[3], irs[4], irs[5],
                irs[6], irs[7]))
            # writer.writerow(time() + rob.position() + np.array(rob.read_irs()))
            left, right = controller.act(irs)
            rob.move(left, right, 2000)

    dataframe = pandas.read_csv('output.csv', sep=',')
    xval = dataframe.x.copy()[1:]
    yval = dataframe.y.copy()[1:]
    zval = dataframe.z.copy()[1:]

    dataframe["Euclid"] = 0
    dataframe["Euclid"] = np.sqrt(
        (dataframe.x[0:(len(dataframe.x) - 1)] - np.array(xval)) ** 2 +
        (dataframe.y[0:(len(dataframe.y) - 1)] - np.array(yval)) ** 2 +
        (dataframe.z[0:(len(dataframe.z) - 1)] - np.array(zval)) ** 2
    )
    dataframe["Euclidean"] = 0
    dataframe["Euclidean"][1:] = dataframe.Euclid[0:(len(dataframe.Euclid) - 1)].cumsum()
    dataframe["Rel_Diff"] = 'False'
    dataframe["Rel_Diff"][dataframe.sensor4[dataframe.sensor4 != 'False'].index] = 0.2 * (
                dataframe.sensor4[dataframe.sensor4 != 'False'].astype(float) + dataframe.sensor5[
            dataframe.sensor5 != 'False'].astype(float) + dataframe.sensor6[dataframe.sensor6 != 'False'].astype(
            float) + dataframe.sensor7[dataframe.sensor7 != 'False'].astype(float) + dataframe.sensor8[
                    dataframe.sensor8 != 'False'].astype(float))
    dataframe["Rel_Diff"][dataframe.Rel_Diff == 'False'] = 100
    dataframe["Rel_Diff"] = dataframe.Rel_Diff.astype(float).diff()
    dataframe["Rel_Diff"][dataframe.Rel_Diff.isnull() == True] = 0
    counter = 0
    for j in range(len(dataframe["Rel_Diff"])):
        if j < (len(dataframe["Rel_Diff"]) - 4):
            if np.abs(dataframe.Rel_Diff[j]) > 0.03 and np.abs(dataframe.Rel_Diff.diff()[j + 2]) < 0.01 and np.abs(
                    dataframe.Rel_Diff.diff()[j + 4]) < 0.001:
                counter += 1
                print(j)
    a = dataframe.Rel_Diff.copy()
    round(a[0:18], 6)
    round(a.diff()[0:20], 6)
    fitness = coef1 * dataframe.Euclidean[len(dataframe.Euclidean) - 1] + (coef2 / (counter + 1))
    # Following code moves the phone stand
    # rob.set_phone_pan(343, 100)
    # rob.set_phone_tilt(109, 100)
    # # time.sleep(1)
    # rob.set_phone_pan(11, 100)
    # rob.set_phone_tilt(26, 100)

    # Following code makes the robot talk and be emotional
    # rob.set_emotion('happy')
    # rob.talk('Hi, my name is Robobo')
    # rob.sleep(1)
    # rob.set_emotion('sad')

    # Following code gets an image from the camera
    image = rob.get_image_front()
    cv2.imwrite("test_pictures.png", image)

    time.sleep(0.1)

    # IR reading
    # for i in range(100):
    #     print("ROB Irs: {}".format(np.log(np.array(rob.read_irs())) / 10))
    #     time.sleep(0.1)

    # pause the simulation and read the collected food
    rob.pause_simulation()

    # Stopping the simualtion resets the environment
    rob.stop_world()
    return fitness
 def __init__(self):
     signal.signal(signal.SIGINT, self.terminate_program)
     self.rob = robobo.SimulationRobobo(number='#0').connect(address='127.0.0.1', port=19997)
     self.rob.play_simulation()
def main():
    signal.signal(signal.SIGINT, terminate_program)

    # rob = robobo.HardwareRobobo(camera=True).connect(address="192.168.1.7")
    rob = robobo.SimulationRobobo(number='').connect(address='127.0.0.1',
                                                     port=19997)
    rob.play_simulation()

    rob.set_phone_tilt(0.8, 50)
    image = rob.get_image_front()

    # mask = cv2.inRange(image, (0, 140, 0), (20, 255, 20))
    mask = cv2.inRange(image, (0, 0, 80), (60, 60, 255))
    # mask = cv2.bitwise_not(mask, mask)
    # masked = cv2.bitwise_and(image, image, mask=mask)
    cv2.imshow('9', mask)
    cv2.imshow('1', image)
    cv2.waitKey(0)  # waits until a key is pressed
    cv2.destroyAllWindows()
    exit()
    # prey_robot = robobo.SimulationRoboboPrey().connect(address='192.168.1.71', port=19989)
    # prey_controller = prey.Prey(robot=prey_robot, level=2, hardware=True)
    prey_robot = robobo.SimulationRoboboPrey('#0').connect(address='127.0.0.1',
                                                           port=19989)
    prey_controller = prey.Prey(robot=prey_robot, level=2)

    prey_controller.start()
    for i in range(5000):
        print("robobo is at {}".format(rob.position()))
        rob.move(90, 90, millis=200)
    prey_controller.stop()
    prey_controller.join()
    prey_robot.disconnect()

    rob.stop_world()
    # move and talk
    # rob.set_emotion('sad')

    # initialise class prey
    # needs to receive the robot to move
    # there are 5 levels of difficulties. From 0 (super easy) to 4 (hard).
    # you can select the one you want,using a parameter in the following constructor, default is 2
    prey_controller = prey.Prey(robot=prey_robot, level=2)
    # start the thread prey
    # makes the prey move
    prey_controller.start()
    # print("start")

    for i in range(10):
        print("robobo is at {}".format(rob.position()))
        rob.move(5, 5, 2000)
    #
    # print("robobo is at {}".format(rob.position()))
    # rob.sleep(1)

    # # Following code moves the phone stand
    # rob.set_phone_pan(343, 100)
    # rob.set_phone_tilt(109, 100)
    # time.sleep(1)
    # rob.set_phone_pan(11, 100)
    # rob.set_phone_tilt(26, 100)

    # rob.talk('Hi, my name is Robobo')
    # rob.sleep(1)
    # rob.set_emotion('happy')

    # Following code gets an image from the camera
    # image = rob.get_image_front()
    # cv2.imwrite("test_pictures.png",image)

    # time.sleep(0.1)

    # # IR reading
    # for i in range(1000000):
    #     print("ROB Irs: {}".format(np.log(np.array(rob.read_irs()))/10))
    #     time.sleep(0.1)

    # stop the prey
    # if you want to stop the prey you have to use the two following commands
    prey_controller.stop()
    prey_controller.join()
    prey_robot.disconnect()

    # pause the simulation and read the collected food
    # rob.pause_simulation()
    # print("Robobo collected {} food".format(rob.collected_food()))

    # Stopping the simualtion resets the environment
    rob.stop_world()

    time.sleep(10)
    # rob.kill_connections()
    # rob = robobo.SimulationRobobo().connect(address='192.168.1.6', port=19997)
    rob.play_simulation()

    rob.set_phone_tilt(0.8, 50)
    # prey_robot = robobo.SimulationRoboboPrey().connect(address='192.168.1.71', port=19989)
    # prey_controller = prey.Prey(robot=prey_robot, level=2, hardware=True)
    prey_robot = robobo.SimulationRoboboPrey().connect(address='192.168.1.6',
                                                       port=19989)
    prey_controller = prey.Prey(robot=prey_robot, level=2)

    prey_controller.start()
    for i in range(10):
        print("robobo is at {}".format(rob.position()))
        rob.move(90, 90, millis=200)
    prey_controller.stop()
    prey_controller.join()

    rob.stop_world()
NUMBER_OF_HIDDEN_Neurons_2 = 2
NUMBER_OF_WEIGHTS = (NUMBER_OF_INPUTS + 1) * NUMBER_OF_HIDDEN_Neurons_1+ (NUMBER_OF_HIDDEN_Neurons_1 + 1) \
                    * NUMBER_OF_HIDDEN_Neurons_2 +  (NUMBER_OF_HIDDEN_Neurons_2 + 1) * NUMBER_OF_OUTPUTS
NUMBER_OF_SIGMAS = NUMBER_OF_WEIGHTS

MU = 10  # how many parents per generation
LAMBDA = 30  # how many children per generation
LOWER = NUMBER_OF_WEIGHTS * [-1]
UPPER = NUMBER_OF_WEIGHTS * [1]

TIME_OUT = 45

with open('IP.txt', 'r') as f:
    ip = f.readlines()[0]

rob = robobo.SimulationRobobo(number='').connect(address=ip, port=19997)
rob2 = robobo.SimulationRobobo(number='#0').connect(address=ip,
                                                    port=19998,
                                                    prey=True)


def fitness(c, weights, prey=False):
    rob.set_phone_tilt(0.8, 50)
    if not prey:
        controller = c(weights, 7, 2, 2)
        prey_controller = c(best_prey, 8, 2, 2)
    else:
        controller = c(best_pred, 7, 2, 2)
        prey_controller = c(weights, 8, 2, 2)
    while (rob.is_simulation_running()):
        pass
Example #15
0
def main():
    signal.signal(signal.SIGINT, terminate_program)

    # rob = robobo.HardwareRobobo(camera=True).connect(address="192.168.1.7")
    rob = robobo.SimulationRobobo().connect(address='192.168.1.6', port=19997)

    rob.play_simulation()

    # ALWAYS connect first to the real robot, then start the simulation and only then connect to the prey
    # if the order is not respected, an error is raised and I do not why
    # if you use the provided scene, do not change the port number
    # if you want to build your own scene, remember to modify the prey port number on vrep
    prey_robot = robobo.SimulationRoboboPrey().connect(address='192.168.1.6', port=19989)
    #
    # rob.pause_simulation()

    # move and talk
    # rob.set_emotion('sad')

    # initialise class prey
    # needs to receive the robot to move
    # there are 5 levels of difficulties. From 0 (super easy) to 4 (hard).
    # you can select the one you want,using a parameter in the following constructor, default is 2
    prey_controller = prey.Prey(robot=prey_robot, level=2)
    # start the thread prey
    # makes the prey move
    prey_controller.start()
    # print("start")

    for i in range(10):
            print("robobo is at {}".format(rob.position()))
            rob.move(5, 5, 2000)
    #
    # print("robobo is at {}".format(rob.position()))
    # rob.sleep(1)

    # # Following code moves the phone stand
    # rob.set_phone_pan(343, 100)
    # rob.set_phone_tilt(109, 100)
    # time.sleep(1)
    # rob.set_phone_pan(11, 100)
    # rob.set_phone_tilt(26, 100)

    # rob.talk('Hi, my name is Robobo')
    # rob.sleep(1)
    # rob.set_emotion('happy')

    # Following code gets an image from the camera
    # image = rob.get_image_front()
    # cv2.imwrite("test_pictures.png",image)

    # time.sleep(0.1)

    # # IR reading
    # for i in range(1000000):
    #     print("ROB Irs: {}".format(np.log(np.array(rob.read_irs()))/10))
    #     time.sleep(0.1)

    # stop the prey
    # if you want to stop the prey you have to use the two following commands
    prey_controller.stop()
    prey_controller.join()
    prey_robot.disconnect()


    # pause the simulation and read the collected food
    # rob.pause_simulation()
    # print("Robobo collected {} food".format(rob.collected_food()))

    # Stopping the simualtion resets the environment
    rob.stop_world()

    time.sleep(10)
    # rob.kill_connections()
    # rob = robobo.SimulationRobobo().connect(address='192.168.1.6', port=19997)
    rob.play_simulation()
    # prey_robot = robobo.SimulationRoboboPrey().connect(address='192.168.1.71', port=19989)
    # prey_controller = prey.Prey(robot=prey_robot, level=2, hardware=True)
    prey_robot = robobo.SimulationRoboboPrey().connect(address='192.168.1.6', port=19989)
    prey_controller = prey.Prey(robot=prey_robot, level=2)

    prey_controller.start()
    for i in range(10):
            print("robobo is at {}".format(rob.position()))
            rob.move(5, 5, 2000)
    prey_controller.stop()
    prey_controller.join()

    rob.stop_world()
Example #16
0
 def __init__(self):
     signal.signal(signal.SIGINT, self.terminate_program)
     self.rob = robobo.SimulationRobobo(number='').connect(address='127.0.0.1', port=19999)                          ##CHECK
     self.time_since_obj = [0, 0]
Example #17
0
 def __init__(self):
     signal.signal(signal.SIGINT, self.terminate_program)
     self.rob = robobo.SimulationRobobo().connect(address='127.0.0.1',
                                                  port=19997)
     self.time_since_obj_L = 0
     self.time_since_obj_R = 0
Example #18
0
def main():
    signal.signal(signal.SIGINT, terminate_program)

    rob = robobo.SimulationRobobo().connect(address='172.17.0.1', port=19997) if use_simulation \
        else robobo.HardwareRobobo(camera=True).connect(address="10.15.3.48")

    rob.set_phone_tilt(45, 100) if use_simulation else rob.set_phone_tilt(
        109, 100)

    state_table = {}
    q_table_file = './src/state_table.json'
    if os.path.exists(q_table_file):
        with open(q_table_file) as g:
            state_table = json.load(g)

    def get_sensor_info(direction):
        a = np.log(np.array(rob.read_irs())) / 10
        all_sensor_info = np.array([0 if x == inf else 1 + (-x / 2) - 0.2 for x in a]) if use_simulation \
            else np.array(np.log(rob.read_irs())) / 10
        all_sensor_info[all_sensor_info == inf] = 0
        all_sensor_info[all_sensor_info == -inf] = 0
        # [0, 1, 2, 3, 4, 5, 6, 7]
        if direction == 'front':
            return all_sensor_info[5]
        elif direction == 'back':
            return all_sensor_info[1]
        elif direction == 'front_left':
            return all_sensor_info[6]
        elif direction == 'front_left_left':
            return all_sensor_info[7]
        elif direction == 'front_right':
            return all_sensor_info[4]
        elif direction == 'front_right_right':
            return all_sensor_info[3]
        elif direction == 'back_left':
            return all_sensor_info[0]
        elif direction == 'back_right':
            return all_sensor_info[2]
        elif direction == 'all':
            print(all_sensor_info[3:])
            return all_sensor_info
        elif direction == 'front_3':
            return [all_sensor_info[3]] + [all_sensor_info[5]
                                           ] + [all_sensor_info[7]]
        else:
            raise Exception('Invalid direction')

    # safe, almost safe, not safe. combine with previous state of safe almost safe and not safe.
    # safe to almost safe is good, almost safe to safe is okay, safe to safe is neutral
    # s to a to r to s'.
    # Small steps for going left or right (left or right are only rotating and straight is going forward).
    # controller is the q values: the boundary for every sensor.

    def move_left():
        rob.move(-speed, speed, dist)

    def move_right():
        rob.move(speed, -speed, dist)

    def go_straight():
        rob.move(speed, speed, dist)

    def move_back():
        rob.move(-speed, -speed, dist)

    boundary_sensor = [0.6, 0.8] if not use_simulation else [0.5, 0.95]
    boundaries_color = [0.1, 0.7] if not use_simulation else [0.05, 0.85]

    # A static collision-avoidance policy
    def static_policy(color_info):
        max_c = np.max(color_info)
        if max_c == color_info[0]:
            return 1
        elif max_c == color_info[1]:
            return 0
        elif max_c == color_info[2]:
            return 2
        return 0

    def epsilon_policy(s, epsilon):
        s = str(s)
        # epsilon greedy
        """"
      ACTIONS ARE DEFINED AS FOLLOWS:
        NUM: ACTION
          ------------
          0: STRAIGHT
          1: LEFT
          2: RIGHT
          ------------
      """
        e = 0 if run_test else epsilon
        if e > random.random():
            return random.choice([0, 1, 2])
        else:
            return np.argmax(state_table[s])

    def take_action(action):
        if action == 1:
            move_left()
        elif action == 2:
            move_right()
        elif action == 0:
            go_straight()
        # elif action == 'back':
        #     move_back()

    def get_color_info():
        image = rob.get_image_front()

        # Mask function
        def get_red_pixels(img):
            hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            lower_range = np.array([0, 50, 20])
            upper_range = np.array([5, 255, 255])
            mask = cv2.inRange(hsv, lower_range, upper_range)
            # print(get_green_pixels(image))
            cv2.imwrite('a.png', mask)
            a = b = 0
            for i in mask:
                for j in i:
                    b += 1
                    if j == 255:
                        a += 1
            return a / b
            # count = 0
            # pix_count = 0
            # b = 64
            # for i in range(len(img)):
            #     for j in range(len(img[i])):
            #         pixel = img[i][j]
            #         pix_count += 1
            #         if (pixel[0] > b or pixel[2] > b) and pixel[1] < b * 2 \
            #                 or (pixel[0] > b*2 and pixel[1] > b*2 and pixel[2] > b*2):
            #             # img[i][j] = [0, 0, 0]
            #             count += 1
            # return 1 - (count / pix_count)

        left, middle_l, middle_r, right = np.hsplit(image, 4)
        middle = np.concatenate((middle_l, middle_r), axis=1)
        return get_red_pixels(left), get_red_pixels(middle), get_red_pixels(
            right)

    def get_reward(previous_state, new_state, previous_sensor, new_sensor,
                   prev_action, action, prev_val, new_val):

        # Max no. of green in img, before and after
        # 0: No green pixels in img;    1: All img consists of green pixels

        prev_right, prev_mid, prev_left = prev_val
        sum_prev_val = sum(prev_val)
        new_right, new_mid, new_left = new_val
        sum_new_val = sum(new_val)
        max_new_sensor = np.max(new_sensor)
        max_prev_sensor = np.max(previous_sensor)
        max_c_prev = np.max(previous_state[3:])
        max_c_new = np.max(new_state[3:])

        # Encourages going towards prey
        if max_c_prev == 0 and max_c_new == 1:
            return 10 if action == 0 else 2  # This was originally 0 when left/right

        # Massive payoff if we get super close to prey
        if max_c_prev == 1 and max_c_new == 2:
            return 30
        if max_c_prev == 2 == max_c_new == 2 \
                and max_prev_sensor == 1:
            return 20

        # Nothing happens if prey gets away
        if max_c_prev == 2 and max_c_new == 1:
            return -2  # This was originally 0

        # Give good reward if we see more red than before
        if sum_prev_val < sum_new_val:
            # return 10 if action == 0 else 0
            return 8 if action == 0 else 1

        # If sensors detect enemy, then give good payoff.
        # If sensors detect wall, give bad payoff to steer clear
        if max_new_sensor > max_prev_sensor:
            return 30 if max_c_new >= 1 else -5

        # Give good payoff to encourage exploring (going straight)
        # Minor bad payoff for turning around, but not bad enough to discourage it
        # return 1 if action == 0 else -1
        return 0 if action == 0 else -2

    # Returns list of values with discretized sensor values and color values.
    def make_discrete(values_s, boundary_s, values_c, boundaries_c):
        discrete_list_s = []
        discrete_list_c = []

        for x in values_s:
            if boundary_s[0] > x:
                discrete_list_s.append(0)
            elif boundary_s[1] > x > boundary_s[0]:
                discrete_list_s.append(1)
            else:
                discrete_list_s.append(2)
        for y in values_c:
            if y < boundaries_c[0]:
                discrete_list_c.append(0)
            elif boundaries_c[0] < y < boundaries_c[1]:
                discrete_list_c.append(1)
            else:
                discrete_list_c.append(2)
        print('real c_values: ', values_c)
        return discrete_list_s + discrete_list_c

    """
  REINFORCEMENT LEARNING PROCESS
  INPUT:  alpha    : learning rate
          gamma    : discount factor
          epsilon  : epsilon value for e-greedy
          episodes : no. of episodes
          act_lim  : no. of actions robot takes before ending episode
          qL       : True if you use Q-Learning
  """
    stat_fitness = list()
    stat_rewards = [0]

    def normalize(reward, old_min, old_max, new_min=-1, new_max=1):
        return ((reward - old_min) /
                (old_max - old_min)) * (new_max - new_min) + new_min

    # def run_static(lim, no_blocks=0):
    #     for i in range(lim):
    #         if use_simulation:
    #             rob.play_simulation()
    #
    #         a, b, c = get_color_info()
    #         current_color_info = a, b, c
    #         current_sensor_info = get_sensor_info('front_3')
    #
    #         current_state = make_discrete(get_sensor_info('front_3'), boundary_sensor, current_color_info,
    #                                       boundaries_color)
    #
    #         if str(current_state) not in state_table.keys():
    #             state_table[str(current_state)] = [0 for _ in range(3)]
    #
    #         a, b, c = get_color_info()
    #         new_color_info = a, b, c
    #         # print(a, b, c, new_color_info)
    #
    #         action = static_policy(new_color_info)
    #
    #         take_action(action)
    #
    #         new_state = make_discrete(get_sensor_info('front_3'), boundary_sensor, new_color_info,
    #                                   boundaries_color)
    #         # TODO: make sure that current color info gets initialized the first time.
    #         r = get_reward(current_state, new_state, action, current_color_info, new_color_info, no_blocks)
    #         if r == 20:
    #             no_blocks += 1
    #
    #         norm_r = normalize(r, -30, 20)
    #
    #         if i != 0:
    #             stat_fitness.append(stat_fitness[-1] + (no_blocks / i))
    #         else:
    #             stat_fitness.append(float(0))
    #         print(fitness)
    #         if stat_rewards:
    #             stat_rewards.append(stat_rewards[-1] + norm_r)
    #         else:
    #             rewards.append(norm_r)
    #
    #         current_state = new_state
    #         current_color_info = new_color_info

    def rl(alpha,
           gamma,
           epsilon,
           episodes,
           act_lim,
           param_tuples,
           qL=False,
           no_blocks=0):

        fitness = list()
        rewards = [0]

        for i in range(episodes):
            print('Episode ' + str(i))
            terminate = False

            if use_simulation:
                rob.play_simulation()
                prey_robot = robobo.SimulationRoboboPrey().connect(
                    address='172.17.0.1', port=19989)
            else:
                prey_robot = robobo.HardwareRobobo(camera=True).connect(
                    address="10.15.3.48")  # Connect to different ip?
            prey_controller = prey.Prey(robot=prey_robot, level=2)
            prey_controller.start(
            )  # Not sure yet if this is indeed needed for both simu and hardware

            current_color_space = get_color_info()
            current_sensor_info = get_sensor_info('front_3')
            current_state = make_discrete(current_sensor_info, boundary_sensor,
                                          current_color_space,
                                          boundaries_color)

            if str(current_state) not in state_table.keys():
                state_table[str(current_state)] = [0 for _ in range(3)]

            action = epsilon_policy(current_state, epsilon)
            # current_collected_food = rob.collected_food() if use_simulation else 0
            # initialise state if it doesn't exist, else retrieve the current q-value
            x = 0
            while not terminate:

                take_action(action)
                # new_collected_food = rob.collected_food() if use_simulation else 0

                # Whole img extracted to get reward value
                # left, mid, right extracted to save state space accordingly

                new_color_space = get_color_info()
                new_sensor_info = get_sensor_info('front_3')
                new_state = make_discrete(new_sensor_info, boundary_sensor,
                                          new_color_space, boundaries_color)

                if str(new_state) not in state_table.keys():
                    state_table[str(new_state)] = [0 for _ in range(3)]

                new_action = epsilon_policy(new_state, epsilon)

                # Retrieve the max action if we use Q-Learning
                max_action = np.argmax(
                    state_table[str(new_state)]) if qL else new_action

                # Get reward
                r = get_reward(current_state, new_state, current_sensor_info,
                               new_sensor_info, action, new_action,
                               current_color_space, new_color_space)
                print("State and obtained Reward: ", new_state, r)

                # norm_r = normalize(r, -30, 20)
                #
                # if i != 0:
                #     fitness.append(no_blocks / i)
                # else:
                #     fitness.append(float(0))
                # # print(fitness)
                # if rewards:
                #     rewards.append(rewards[-1] + norm_r)
                # else:
                #     rewards.append(norm_r)

                # Update rule
                if not run_test:
                    # print('update')
                    state_table[str(current_state)][action] += \
                        alpha * (r + (gamma *
                                      np.array(
                                          state_table[str(new_state)][max_action]))
                                 - np.array(state_table[str(current_state)][action]))

                # Stop episode if we get very close to an obstacle
                if (max(new_state[:3]) == 2 and max(new_state[3:]) != 2
                        and use_simulation) or x == act_lim - 1:
                    state_table[str(new_state)][new_action] = -10
                    terminate = True
                    print("done")
                    if not run_test:
                        print('writing json')
                        with open(q_table_file, 'w') as json_file:
                            json.dump(state_table, json_file)

                    if use_simulation:
                        print("stopping the simulation")
                        prey_controller.stop()
                        prey_controller.join()
                        prey_robot.disconnect()
                        rob.stop_world()
                        while not rob.is_sim_stopped():
                            print("waiting for the simulation to stop")
                        time.sleep(2)

                # update current state and action
                current_state = new_state
                current_sensor_info = new_sensor_info
                action = new_action
                current_color_space = new_color_space

                # increment action limit counter
                x += 1

        return fitness, rewards

    experiments = 2

    epsilons = [0.01, 0.08, 0.22]
    gammas = [0.9]
    param_tuples = [(epsilon, gamma) for epsilon in epsilons
                    for gamma in gammas]
    _, _ = rl(0.9, 0, 0, 10, 200, [()],
              qL=True)  # alpha, gamma, epsilon, episodes, actions per episode
def main():
    signal.signal(signal.SIGINT, terminate_program)

    rob = robobo.SimulationRobobo("#0").connect(address='10.15.3.49', port=19997) if use_simulation \
        else robobo.HardwareRobobo(camera=True).connect(address="192.168.43.187")

    def get_sensor_info(direction):
        a = np.log(np.array(rob.read_irs())) / 10
        all_sensor_info = np.array([0 if x == inf else 1 + (-x / 2) - 0.2 for x in a]) if use_simulation \
            else np.array(np.log(rob.read_irs())) / 10
        all_sensor_info[all_sensor_info == inf] = 0
        all_sensor_info[all_sensor_info == -inf] = 0
        # [0, 1, 2, 3, 4, 5, 6, 7]
        if direction == 'front':
            return all_sensor_info[5]
        elif direction == 'back':
            return all_sensor_info[1]
        elif direction == 'front_left':
            return np.max(all_sensor_info[[6, 7]])
        elif direction == 'front_right':
            return np.max(all_sensor_info[[3, 4]])
        elif direction == 'back_left':
            return all_sensor_info[0]
        elif direction == 'back_right':
            return all_sensor_info[2]
        elif direction == 'all':
            return all_sensor_info
        else:
            raise Exception('Invalid direction')

    # safe, almost safe, not safe. combine with previous state of safe almost safe and not safe.
    # safe to almost safe is good, almost safe to safe is okay, safe to safe is neutral
    # s to a to r to s'.
    # Small steps for going left or right (left or right are only rotating and straight is going forward).
    # controller is the q values: the boundary for every sensor.

    def move_left():
        rob.move(-speed, speed, dist)

    def move_right():
        rob.move(speed, -speed, dist)

    def go_straight():
        rob.move(speed, speed, dist)

    def move_back():
        rob.move(-speed, -speed, dist)

    boundary = [0.5, 0.8] if not use_simulation else [0.75, 0.95]

    # A static collision-avoidance policy
    def static_policy(s):
        if get_sensor_info('front_left') >= s \
                and get_sensor_info('front_left') > get_sensor_info('front_right'):
            take_action('right')

        elif get_sensor_info('front_right') >= s \
                and get_sensor_info('front_right') > get_sensor_info('front_left'):
            take_action('left')
        else:
            take_action('straight')

    state_table = {}
    if os.path.exists('./src/state_table.json'):
        with open('./src/state_table.json') as f:
            state_table = json.load(f)

    def epsilon_policy(s, epsilon):
        s = str(s)
        # epsilon greedy
        """"
        ACTIONS ARE DEFINED AS FOLLOWS:
          NUM: ACTION
            ------------
            0: STRAIGHT
            1: LEFT
            2: RIGHT
            ------------
        """
        e = 0 if run_test else epsilon
        if e > random.random():
            return random.choice([0, 1, 2])
        else:
            return np.argmax(state_table[s])

    def take_action(action):
        if action == 1:
            move_left()
        elif action == 2:
            move_right()
        elif action == 0:
            go_straight()
        # elif action == 'back':
        #     move_back()

    def get_reward(current, new, action):
        if current == 0 and new == 0:
            return 0 if action == 0 else -1
        elif current == 0 and new == 1:
            return 1
        elif current == 0 and new == 2:
            return -10
        elif current == 1 and new == 0:
            return 1
        elif current == 1 and new == 1:
            return 1 if action == 0 else 0
        elif current == 1 and new == 2:
            return -10
        return 0
        # TODO give negative reward for repetitions

    def make_discrete(values, boundaries):
        discrete_list = []
        for x in values:
            if x > boundaries[1]:
                discrete_list.append(2)
            elif boundaries[1] > x > boundaries[0]:
                discrete_list.append(1)
            elif boundaries[0] > x:
                discrete_list.append(0)
        return discrete_list

    """
    REINFORCEMENT LEARNING PROCESS
    INPUT:  alpha    : learning rate
            gamma    : discount factor
            epsilon  : epsilon value for e-greedy
            episodes : no. of episodes
            act_lim  : no. of actions robot takes before ending episode
            qL       : True if you use Q-Learning
    """

    def rl(alpha, gamma, epsilon, episodes, act_lim, qL=False):
        for i in range(episodes):
            print('Episode ' + str(i))
            terminate = False
            if use_simulation:
                rob.play_simulation()

            current_state = make_discrete(get_sensor_info('all')[3:], boundary)

            if str(current_state) not in state_table.keys():
                state_table[str(current_state)] = [0 for _ in range(3)]

            action = epsilon_policy(current_state, epsilon)
            # initialise state if it doesn't exist, else retrieve the current q-value
            x = 0
            while not terminate:
                take_action(action)
                new_state = make_discrete(get_sensor_info('all')[3:], boundary)

                if str(new_state) not in state_table.keys():
                    state_table[str(new_state)] = [0 for _ in range(3)]

                new_action = epsilon_policy(new_state, epsilon)

                # Retrieve the max action if we use Q-Learning
                max_action = np.argmax(
                    state_table[str(new_state)]) if qL else new_action

                # Get reward
                r = get_reward(np.max(current_state), np.max(new_state),
                               action)

                normalized_r = ((r - -10) / (2 - -10)) * (1 - -1) + -1
                fitness.append(normalized_r *
                               np.max(get_sensor_info("all")[3:]))
                # print(fitness)
                if rewards:
                    rewards.append(rewards[-1] + normalized_r)
                else:
                    rewards.append(normalized_r)

                # Update rule
                print("r: ", r)

                if not run_test:
                    print('update')
                    state_table[str(current_state)][action] += \
                        alpha * (r + (gamma *
                                      np.array(
                                          state_table[str(new_state)][max_action]))
                                 - np.array(state_table[str(current_state)][action]))

                # Stop episode if we get very close to an obstacle
                if max(new_state) == 2 or x == act_lim - 1:
                    state_table[str(new_state)][new_action] = -10
                    terminate = True
                    print("done")
                    if not run_test:
                        print('writing json')
                        with open('./src/state_table.json', 'w') as json_file:
                            json.dump(state_table, json_file)

                    if use_simulation:
                        print("stopping the simulation")
                        rob.stop_world()
                        while not rob.is_sim_stopped():
                            print("waiting for the simulation to stop")
                        time.sleep(2)

                # update current state and action
                current_state = new_state
                action = new_action

                # increment action limit counter
                x += 1

    # alpha, gamma, epsilon, episodes, actions per episode
    rl(0.9, 0.9, 0.05, 30, 500, qL=True)

    pprint(state_table)

    if not run_test:
        all_rewards = []
        all_fits = []
        if os.path.exists('./src/rewards.csv'):
            with open('./src/rewards.csv') as f:
                all_rewards = pickle.load(f)

        if os.path.exists('./src/fitness.csv'):
            with open('./src/fitness.csv') as f:
                all_fits = pickle.load(f)

        all_rewards += rewards
        all_fits += fitness

        print(all_rewards)
        print(all_fits)

        with open('./src/rewards.csv', 'w') as f:
            pickle.dump(all_rewards, f)

        with open('./src/fitness.csv', 'w') as f:
            pickle.dump(all_fits, f)

        plt.figure('Rewards')
        plt.plot(all_rewards)
        plt.savefig("./src/plot_reward.png")
        plt.show()

        plt.figure('Fitness Values')
        plt.plot(all_fits)
        plt.savefig("./src/plot_fitness.png")
def main():

    global crashed

    EP_COUNT = 100
    STEP_COUNT = 50
    STEP_SIZE_MS = 500
    CRASH_SENSOR_BOUNDARY = -0.43  # negative for simulation, positive for RW. Not used right now
    CRASH_POSITION_BOUNDARY = 0.02

    A = [(20,20), (0,20), (20,0)]#, (10,5), (5,10)]  # (-25,-25),
    epsilon = 0.1
    epsilon_decaying = 0.5
    gamma = 0.1
    recency_factor = 0.01  # higher parameter setting -> forget older information faster
    # proximity_factor = 1  # how heavily to penalize proximity to obstacle

    ACTION_NAMES = ['forward', 'sharp left', 'sharp right']  # 'left', 'right']  # 'backward',
    SENS_NAMES = ["IR" + str(i + 1) for i in range(8)]

    # Initialize the robot -> SET THESE PARAMETERS!
    hardware = False
    # nn_from_file = True if input('Would you like to use a pre-trained neural network? (y/n') == 'y' else False
    nn_from_file = True
    learning = False  # Disable this if you want to run a pre-trained network
    if nn_from_file is True:
        print('loading network, disabling learning...')
        learning = False

    if hardware:
        rob = robobo.HardwareRobobo(camera=True).connect(address="192.168.1.7")
    else:
        rob = robobo.SimulationRobobo().connect(address='172.20.10.3', port=19997)
        rob.stop_world()
        time.sleep(0.1)

    if not learning:
        epsilon = 0
        epsilon_decaying = 0

    # Initialize the data structure and neural network
    eps = []
    hidden_layers = [16, 12]
    model = init_nn(input_dims=len(SENS_NAMES), output_dims=len(A),
                    hidden_layers=hidden_layers, from_file=nn_from_file)

    for episode in range(EP_COUNT):

        data = EpisodeData(ACTION_NAMES, sens_names=SENS_NAMES)

        signal.signal(signal.SIGINT, terminate_program)
        start_simulation(rob)

        ### INITIALIZATION ###
        print('\n--- episode {} ---'.format(episode + 1))

        S = np.log(np.array(rob.read_irs()))
        S[np.isinf(S)] = 0
        last_position = np.array([0,0,0])

        ########## Q-LEARNING LOOP ##########

        crashed = False

        for i in range(STEP_COUNT):
            start_time = time.time()

            print('\n--- step {} ---'.format(i+1))

            # pos = rob.position()

            ### ACTION SELECTION & EXECUTION ###
            Q_s = model.predict(np.expand_dims(S, 0))[0]

            # request wheel speed parameters for max action
            action, wheels = e_greedy_action(Q_s, A, epsilon)# epsilon_decaying ** (1 + (0.1 * episode)))

            # move the robot
            rob.move(wheels['left'], wheels['right'], STEP_SIZE_MS)

            if learning:
                time.sleep(STEP_SIZE_MS/1000)

            ### OBSERVING NEXT STATE ###
            S_prime = np.log(np.array(rob.read_irs()))
            S_prime[np.isinf(S_prime)] = 0

            print("ROB IRs: {}".format(S_prime / 10))
            current_position = np.array(rob.position())
            print("robobo is at {}".format(current_position))

            # observe the reward
            s_trans = wheels['left'] + wheels['right']  # translational speed of the robot
            s_rot = abs(wheels['left'] - wheels['right'])  # rotational speed of the robot

            if hardware:
                crashed = False
                raise NotImplementedError("Haven't implemented this, I suggest using a threshold for the sensor (see"
                                          "code below this statement)")
            else:
                dist = np.linalg.norm(last_position - current_position)
                crashed = min(S_prime[3:] / 10) < CRASH_SENSOR_BOUNDARY or dist < CRASH_POSITION_BOUNDARY

            if not crashed:
                # reward = 1 + min(S) * proximity_factor  # - (wheels == A[1])
                # see Eiben et al. for this formula
                reward = s_trans * (1 - 0.9 * (s_rot / 20)) * (1 - (min(S_prime[3:]) / -0.65))
            else:
                reward = -400

            # Retrieve Q values from neural network
            Q_prime = model.predict(np.expand_dims(S_prime, 0))[0]

            ### LEARNING ###

            Q_target = reward + (gamma * np.argmax(Q_prime))

            Q_targets = np.copy(Q_s)
            Q_targets[action] = Q_target

            ### SAVE DATA ###

            # pos = np.array([1,2,3])
            data.update(i, S, Q_s, Q_targets, reward)  # pos removed

            ### TERMINATION CONDITION ###

            # if S == S_prime and not S.sum() == 0:  # np.isinf(S).any() is False:
            #     print('Termination condition reached')
            #     break

            S = np.copy(S_prime)
            last_position = current_position

            print("crashed: ", crashed)
            print("chosen action:", ACTION_NAMES[action])
            print('reward: ', reward)
            print("Q_s (NN output): ", Q_s)
            print("Updated Q-values: " + str(Q_targets))

            elapsed_time = time.time() - start_time

            if crashed:
                break

        # terminate the episode data and store it
        data.terminate()
        eps.append(data)

        if learning:

            print("\n----- Learning ----\n")
            X = pd.concat([ep_data.sens for ep_data in eps])
            y = pd.concat([ep_data.Q_targets for ep_data in eps])

            # # calculate sample weights
            # ep_lengths = [len(ep_data.sens) for ep_data in eps[::-1]]
            # sample_weights = []
            #
            # for i, ep_length in enumerate(ep_lengths):
            #     sample_weights = sample_weights + ([(1 - recency_factor) ** i] * ep_length)

            # perform learning over the episode
            model.fit(X, y, epochs=100) #sample_weight=np.array(sample_weights))

        # # perform an evaluation of the episode (probably not necessary till later)
        # model.evaluate(data)

        # time.sleep(0.1)

        # pause the simulation and read the collected food
        # rob.pause_simulation()

        rob.sleep(1)

        if crashed or episode == EP_COUNT:
            save_nn(model)
            print('Robot crashed, resetting simulation...')
            data.terminate(crashed)
            # could implement something here to save the experience if resetting the simulation!

        if crashed:
            rob.stop_world()

    rob.stop_world()
Example #21
0
NUMBER_OF_OUTPUTS = 2
NUMBER_OF_HIDDEN_Neurons_1 = 2
NUMBER_OF_HIDDEN_Neurons_2 = 2
NUMBER_OF_WEIGHTS = (NUMBER_OF_INPUTS + 1) * NUMBER_OF_HIDDEN_Neurons_1+ (NUMBER_OF_HIDDEN_Neurons_1 + 1) \
                    * NUMBER_OF_HIDDEN_Neurons_2 +  (NUMBER_OF_HIDDEN_Neurons_2 + 1) * NUMBER_OF_OUTPUTS
NUMBER_OF_SIGMAS = NUMBER_OF_WEIGHTS

MU = 10  # how many parents per generation
LAMBDA = 30  # how many children per generation
LOWER = NUMBER_OF_WEIGHTS * [-1]
UPPER = NUMBER_OF_WEIGHTS * [1]

with open('IP.txt', 'r') as f:
    ip = f.readlines()[0]

rob = robobo.SimulationRobobo(number='').connect(address=ip, port=19997)


def fitness(c, weights):
    rob.set_phone_tilt(0.8, 50)
    fitness_score = 0
    controller = c(weights, 7, 2, 2)
    fitness = []
    for i in range(5):
        while (rob.is_simulation_running()):
            pass
        rob.play_simulation()
        start = rob.get_sim_time()
        while (rob.get_sim_time() - start < 60 * 1000):
            sensors = rob.read_irs()[3:]  # front sensors, length 5
            sensors = [