if __name__ == "__main__": max_round = 30 file_path = '000065.SZ_NormalData.csv' df = pd.read_csv(file_path) df = df.sort_values('trade_date', ascending=True) df = df.iloc[22:].reset_index(drop=True) # 去除前几天没有均线信息 env = stock(df.iloc[0:1500]) RL = DeepQNetwork( env.n_actions, env.n_features, learning_rate=0.02, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=4000, batch_size=512, # output_graph=True ) run(max_round) # env = stock(df) # env = BackTest(env, show_log=True) # env.draw('trade.png', 'profit.png') env = stock(df.iloc[1500:].reset_index(drop=True)) env = BackTest(env, show_log=True) env.draw('trade1.png', 'profit1.png') env = BackTest(env, show_log=True, my_trick=True)
import gym from RL_brain2 import DeepQNetwork env = gym.make('SpaceInvaders-v0') env = env.unwrapped print("action_space:", env.action_space) print("observation_space:", env.observation_space) print("observation_space.high:", env.observation_space.high) print("observation_space.low:", env.observation_space.low) RL = DeepQNetwork(n_actions=env.action_space.n, n_features=env.observation_space.shape[0] * env.observation_space.shape[1] * env.observation_space.shape[2], learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.001,) total_steps = 0 ep_rhistory = [] for i_episode in range(500): observation = env.reset() ep_r = 0 while True: # env.render()
import tkinter as tk from env import crossing from visual import Visual np.set_printoptions(threshold=np.inf) #print(env.observation_space.shape[0]) parser = argparse.ArgumentParser(description='Train or test neural net motor controller.') parser.add_argument('--train', dest='train', action='store_true', default=False) parser.add_argument('--test', dest='test', action='store_true', default=True) args = parser.parse_args() RL = DeepQNetwork(n_actions=4, #2*2 #n_features=env.observation_space.shape[0], n_features=10, #2*5 learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.001,) def road_map(): cross1=crossing(light_state=0,q_states=[0,0,0,1]) cross2=crossing(light_state=0,q_states=[0,0,1,0]) step_set=[] reward_set=[]
#print(env.observation_space.shape[0]) parser = argparse.ArgumentParser( description='Train or test neural net motor controller.') parser.add_argument('--train', dest='train', action='store_true', default=True) # parser.add_argument('--test', dest='test', action='store_true', default=True) args = parser.parse_args() # size of crossroads grid grid_x = 4 grid_y = 4 RL = DeepQNetwork( n_actions=2**(grid_x * grid_y), #0,1 for each crossroad n_features=5 * (grid_x * grid_y), #2*5 (5 = 4 numbers of cars + 1 light state) learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.001, ) x = [] y = [] for i in range(grid_x): x.append(i + 1) for i in range(grid_y): y.append(i + 1) #property of visualization times = 100 #interval: crossroad & crossroad bias = 6 #distance: light & center of crossroad
print 'success =', success break # swap observation observation = observation_ step += 1 # end of game print('game over') plt.plot(np.arange(episode_number), rr_episode, '.') plt.ylabel('reward') plt.xlabel('training episode') plt.show() plt.plot(np.arange(episode_number), step_episode, '.') plt.ylabel('step') plt.xlabel('training episode') plt.show() if __name__ == "__main__": # maze game env = UR5() RL = DeepQNetwork(4, 12, learning_rate=0.00001, reward_decay=0.5, e_greedy=1, replace_target_iter=3, memory_size=4000, output_graph=False) run() RL.plot_cost()