vertical_edge_mat = np.zeros([28,28]) vertical_edge_mat[:,14:] = 1.0 recorder = Recorder(n=6) debu2el = np.diag(np.ones([10-1]),k=1)+np.eye(10) # debu2el = debu2el[:-1,:] scene = syc.Scene(image_matrix=vertical_edge_mat) sensor = syc.Sensor() agent = syc.Agent(max_q = [scene.maxx-sensor.hp.winx,scene.maxy-sensor.hp.winy]) reward = syc.Rewards() RL = DeepQNetwork(len(agent.hp.action_space), sensor.hp.winx+2,#sensor.frame_size+2, reward_decay=0.9, e_greedy=0.99, e_greedy0=0.25, replace_target_iter=10, memory_size=30000, e_greedy_increment=0.001, state_table=None ) hp.scene = scene.hp hp.sensor = sensor.hp hp.agent = agent.hp hp.reward = reward.hp hp.RL = RL.hp run_env()
scene = syc.Scene(frame_list=images) sensor = syc.Sensor( log_mode=False, log_floor = 1.0) agent = syc.Agent(max_q = [scene.maxx-sensor.hp.winx,scene.maxy-sensor.hp.winy]) reward = syc.Rewards(reward_types=['central_rms_intensity', 'speed', 'saccade'],relative_weights=[1.0,-float(sys.argv[1]),-200]) # observation_size = sensor.hp.winx*sensor.hp.winy*2 observation_size = 64*64 RL = DeepQNetwork(len(agent.hp.action_space), observation_size, n_features_shaped=list(np.shape(sensor.dvs_view))+[1], shape_fun=lambda x: np.reshape(x,[-1]+list(np.shape(sensor.dvs_view))+[1]), reward_decay=0.99, e_greedy=0.95, e_greedy0=0.8, replace_target_iter=10, memory_size=100000, e_greedy_increment=0.0001, learning_rate=0.0025, double_q=True, dqn_mode=True, state_table=np.zeros([1,observation_size*hp.mem_depth]), soft_q_type='boltzmann', beta=0.1, arch='conv' ) # RL.dqn.load_nwk_param('tempX_1.nwk') # RL.dqn.save_nwk_param('liron_encircle.nwk') hp.scene = scene.hp hp.sensor = sensor.hp hp.agent = agent.hp hp.reward = reward.hp hp.RL = RL.hp
recorder = Recorder(n=5) scene = syc.Scene(image_matrix=np.array(images[0]).reshape([28, 28])) sensor = syc.Sensor() agent = syc.Agent( max_q=[scene.maxx - sensor.hp.winx, scene.maxy - sensor.hp.winy]) reward = syc.Rewards() observation_size = sensor.hp.winx * sensor.hp.winy RL = DeepQNetwork( len(agent.hp.action_space), observation_size * hp.mem_depth, #sensor.frame_size+2, reward_decay=0.99, e_greedy=0.9, e_greedy0=0.8, replace_target_iter=10, memory_size=100000, e_greedy_increment=0.0001, learning_rate=0.0025, double_q=False, dqn_mode=True, state_table=np.zeros([1, observation_size * hp.mem_depth])) hp.scene = scene.hp hp.sensor = sensor.hp hp.agent = agent.hp hp.reward = reward.hp hp.RL = RL.hp with open(hp_file, 'wb') as f: pickle.dump(hp, f) run_env()
# end of game print('game over') #env.destroy() if __name__ == "__main__": # maze game env = Image_env1(bmp_features=BMP_MODE) all_observations_for_mapping = env.observation_space( ) if not BMP_MODE else None RL = DeepQNetwork( env.n_actions, env.n_features, #learning_rate=0.00005, reward_decay=0.5, e_greedy=0.8, e_greedy0=0.5, replace_target_iter=20, memory_size=300000, # memory_size=1000, e_greedy_increment=0.001, # output_graph=True state_table=all_observations_for_mapping) run_img() # env.after(100, run_img) # print('-----------------------------------------') # env.mainloop() env.plot_reward() env.save_train_history() #RL.plot_cost()