Beispiel #1
0
    vertical_edge_mat = np.zeros([28,28])
    vertical_edge_mat[:,14:] = 1.0
    recorder = Recorder(n=6)
    debu2el = np.diag(np.ones([10-1]),k=1)+np.eye(10)
    # debu2el = debu2el[:-1,:]

    scene = syc.Scene(image_matrix=vertical_edge_mat)
    sensor = syc.Sensor()
    agent = syc.Agent(max_q = [scene.maxx-sensor.hp.winx,scene.maxy-sensor.hp.winy])
    reward = syc.Rewards()
    RL = DeepQNetwork(len(agent.hp.action_space), sensor.hp.winx+2,#sensor.frame_size+2,
                      reward_decay=0.9,
                      e_greedy=0.99,
                      e_greedy0=0.25,
                      replace_target_iter=10,
                      memory_size=30000,
                      e_greedy_increment=0.001,
                      state_table=None
                      )


    hp.scene = scene.hp
    hp.sensor = sensor.hp
    hp.agent = agent.hp
    hp.reward = reward.hp
    hp.RL = RL.hp

    run_env()

Beispiel #2
0
    scene = syc.Scene(frame_list=images)
    sensor = syc.Sensor( log_mode=False, log_floor = 1.0)
    agent = syc.Agent(max_q = [scene.maxx-sensor.hp.winx,scene.maxy-sensor.hp.winy])

    reward = syc.Rewards(reward_types=['central_rms_intensity', 'speed', 'saccade'],relative_weights=[1.0,-float(sys.argv[1]),-200])
    # observation_size = sensor.hp.winx*sensor.hp.winy*2
    observation_size = 64*64
    RL = DeepQNetwork(len(agent.hp.action_space), observation_size,
                      n_features_shaped=list(np.shape(sensor.dvs_view))+[1],
                      shape_fun=lambda x: np.reshape(x,[-1]+list(np.shape(sensor.dvs_view))+[1]),
                      reward_decay=0.99,
                      e_greedy=0.95,
                      e_greedy0=0.8,
                      replace_target_iter=10,
                      memory_size=100000,
                      e_greedy_increment=0.0001,
                      learning_rate=0.0025,
                      double_q=True,
                      dqn_mode=True,
                      state_table=np.zeros([1,observation_size*hp.mem_depth]),
                      soft_q_type='boltzmann',
                      beta=0.1,
                      arch='conv'
                      )
    # RL.dqn.load_nwk_param('tempX_1.nwk')
    # RL.dqn.save_nwk_param('liron_encircle.nwk')
    hp.scene = scene.hp
    hp.sensor = sensor.hp
    hp.agent = agent.hp
    hp.reward = reward.hp
    hp.RL = RL.hp
Beispiel #3
0
    recorder = Recorder(n=5)

    scene = syc.Scene(image_matrix=np.array(images[0]).reshape([28, 28]))
    sensor = syc.Sensor()
    agent = syc.Agent(
        max_q=[scene.maxx - sensor.hp.winx, scene.maxy - sensor.hp.winy])

    reward = syc.Rewards()
    observation_size = sensor.hp.winx * sensor.hp.winy
    RL = DeepQNetwork(
        len(agent.hp.action_space),
        observation_size * hp.mem_depth,  #sensor.frame_size+2,
        reward_decay=0.99,
        e_greedy=0.9,
        e_greedy0=0.8,
        replace_target_iter=10,
        memory_size=100000,
        e_greedy_increment=0.0001,
        learning_rate=0.0025,
        double_q=False,
        dqn_mode=True,
        state_table=np.zeros([1, observation_size * hp.mem_depth]))

    hp.scene = scene.hp
    hp.sensor = sensor.hp
    hp.agent = agent.hp
    hp.reward = reward.hp
    hp.RL = RL.hp
    with open(hp_file, 'wb') as f:
        pickle.dump(hp, f)
    run_env()
Beispiel #4
0
    # end of game
    print('game over')
    #env.destroy()


if __name__ == "__main__":
    # maze game
    env = Image_env1(bmp_features=BMP_MODE)
    all_observations_for_mapping = env.observation_space(
    ) if not BMP_MODE else None
    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,
        #learning_rate=0.00005,
        reward_decay=0.5,
        e_greedy=0.8,
        e_greedy0=0.5,
        replace_target_iter=20,
        memory_size=300000,
        # memory_size=1000,
        e_greedy_increment=0.001,
        # output_graph=True
        state_table=all_observations_for_mapping)
    run_img()
    # env.after(100, run_img)
    # print('-----------------------------------------')
    # env.mainloop()
    env.plot_reward()
    env.save_train_history()
    #RL.plot_cost()