def run_dqn(name_asset, n_features, n_neurons, n_episodes, batch_size, random_action_decay, future_reward_importance): # returns a list of stocks closing price df = pd.read_csv(INPUT_CSV_TEMPLATE % name_asset) data = df['Close'].astype( float).tolist() #https://www.kaggle.com/camnugent/sandp500 l = len(data) - 1 print( f'Running {n_episodes} episodes, on {name_asset} (has {l} rows), features={n_features}, ' f'batch={batch_size}, random_action_decay={random_action_decay}') dqn = Dqn() profit_vs_episode, trades_vs_episode, epsilon_vs_episode, model_name, num_trains, eps = \ dqn.learn(data, n_episodes, n_features, batch_size, USE_EXISTING_MODEL, RANDOM_ACTION_MIN, random_action_decay, n_neurons, future_reward_importance) print(f'Learning completed. Backtest the model {model_name} on any stock') print('python backtest.py ') print(f'see plot of profit_vs_episode = {profit_vs_episode[:10]}') plot_barchart(profit_vs_episode, "episode vs profit", "episode vs profit", "total profit", "episode", 'green') print(f'see plot of trades_vs_episode = {trades_vs_episode[:10]}') plot_barchart(trades_vs_episode, "episode vs trades", "episode vs trades", "total trades", "episode", 'blue') text = f'{name_asset} ({l}), features={n_features}, nn={n_neurons},batch={batch_size}, ' \ f'epi={n_episodes}({num_trains}), eps={np.round(eps, 1)}({np.round(random_action_decay, 5)})' print(f'see plot of epsilon_vs_episode = {epsilon_vs_episode[:10]}') plot_barchart(epsilon_vs_episode, "episode vs epsilon", "episode vs epsilon", "epsilon(probability of random action)", text, 'red') print(text)
def warehouse(): from gymcolab.envs.warehouse import Warehouse from models import VanillaDqnModel args = dict( buffersize=100000, lr=0.001, target_update_ratio=100, gamma=0.99, episodes=10000, update_begin=75, init_eps=0.5, terminal_eps=0.0, batchsize=32, ) device = "cuda" env = Warehouse() n_nodes, height, width = env.observation_space.shape n_act = env.action_space.n - 1 assert height == width, "Environment map must be square" model = VanillaDqnModel(n_nodes, height, n_act) optim = torch.optim.Adam(model.parameters(), lr=args["lr"]) dqn = Dqn(model, optim, args["buffersize"]) dqn.to(device) learn(args, env, dqn)
def bt(data, num_features, use_existing_model, model_name): dqn = Dqn() agent = Agent(num_features, use_existing_model, model_name) state = dqn.get_state(data, num_features, num_features) total_profits = 0 total_holds = 0 total_buys = 1 total_sells = 0 l = len(data) - 1 for t in range(num_features,l): action = agent.choose_best_action(state)#it will always predict reward, total_profits, total_holds, total_buys, total_sells = dqn.execute_action (action, data[t], t, total_profits, total_holds, total_buys, total_sells) done = True if t == l - 1 else False next_state = dqn.get_state(data, t + 1, num_features) print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}') state = next_state if done: print("-----------------------------------------") print(f'Total Profit: {formatPrice(total_profits)} , Total hold/buy/exit trades: {total_holds} / {total_buys} / {total_sells}') print("-----------------------------------------")
def warehouse_graph(): from gymcolab.envs.warehouse import Warehouse from models import GraphDqnModel args = dict( buffersize=100000, lr=0.0001, target_update_ratio=100, gamma=0.99, episodes=10000, update_begin=75, init_eps=0.9, terminal_eps=0.1, batchsize=32, ) device = "cuda" env = Warehouse() n_nodes, height, width = env.observation_space.shape n_act = env.action_space.n assert height == width, "Environment map must be square" adj = torch.tensor([[[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 0, 0]]]).float().to(device) n_edges = 3 class GraphDqnModelAdj(GraphDqnModel): def __init__(self, n_edges, n_nodes, mapsize, n_act, adj): super().__init__(n_edges, n_nodes, mapsize, n_act) self.adj = adj def forward(self, objmap): return super().forward(self.adj, objmap) model = GraphDqnModelAdj(n_edges, n_nodes, height, n_act, adj) optim = torch.optim.Adam(model.parameters(), lr=args["lr"]) dqn = Dqn(model, optim, args["buffersize"]) dqn.to(device) learn(args, env, dqn)
def cartpole(): args = dict(buffersize=100000, lr=0.001, target_update_ratio=200, gamma=0.99, episodes=1000, update_begin=78, init_eps=0.7, terminal_eps=0.1, batchsize=32) env = gym.make("CartPole-v1") in_size = env.observation_space.shape[0] n_act = env.action_space.n model = FullyConnectedModel(in_size, n_act) optim = torch.optim.Adam(model.parameters(), lr=args["lr"]) dqn = Dqn(model, optim, args["buffersize"]) learn(args, env, dqn)
def bt(data, n_features, use_existing_model, name_model): dqn = Dqn() dqn.open_orders = [data[0]] agent = Agent(n_features, use_existing_model, name_model) state = dqn.get_state(data, n_features, n_features) total_profits = 0 total_holds = 0 total_buys = 1 total_sells = 0 total_notvalid = 0 l = len(data) - 1 for t in range(n_features, l): action = agent.choose_best_action(state) # it will always predict reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \ dqn.execute_action(action, data[t], t, total_profits, total_holds, total_buys, total_sells, total_notvalid) done = True if t == l - 1 else False next_state = dqn.get_state(data, t + 1, n_features) #print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}') state = next_state if done: # sell position at end of episode reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \ dqn.execute_action(2, data[t+1], t+1, total_profits, total_holds, total_buys, total_sells, total_notvalid) print("-----------------------------------------") print( f'Total Profit: {formatPrice(total_profits*100)} ,' f' Total hold/buy/sell/notvalid trades: {total_holds} / {total_buys} / {total_sells} / {total_notvalid}' ) print("-----------------------------------------")
batch_size = 1 # (int) > 0 size of a batched sampled from replay buffer for training random_action_decay = 0.8993 # (float) 0-1 future_reward_importance = 0.9500 # (float) 0-1 aka decay or discount rate, determines the importance of future # rewards.If=0 then agent will only learn to consider current rewards. if=1 it will make it strive for a long-term # high reward. # do not touch those params random_action_min = 0.0 # (float) 0-1 do not touch this use_existing_model = False # (bool) do not touch this data = getStockDataVec(stock_name) # https://www.kaggle.com/camnugent/sandp500 l = len(data) - 1 print( f'Running {episodes} episodes, on {stock_name} (has {l} rows), features={num_features}, batch={batch_size}, random_action_decay={random_action_decay}' ) dqn = Dqn() profit_vs_episode, trades_vs_episode, epsilon_vs_episode, model_name, num_trains, eps = \ dqn.learn(data, episodes, num_features, batch_size, use_existing_model, random_action_min, random_action_decay, num_neurons, future_reward_importance) print( f'i think i learned to trade. now u can backtest the model {model_name} on any stock' ) print('python backtest.py ') minutes = np.round((time.time() - start_time) / 60, 1) # minutes text = f'{stock_name} ({l}),t={minutes}, features={num_features}, nn={num_neurons},batch={batch_size}, epi={episodes}({num_trains}), eps={np.round(eps, 1)}({np.round(random_action_decay, 5)})' print(f'see plot of profit_vs_episode = {profit_vs_episode[:10]}') plot_barchart(profit_vs_episode, "episode vs profit", "episode vs profit", "total profit", "episode", 'green')
def __init__(self, env): Dqn.__init__(self, env) self.save_path = 'app/saves/ddqn' self.train_network = self.create_network() self.target_network = self.create_network() self.target_network.set_weights(self.train_network.get_weights())
def done_update(self, episode, score): Dqn.done_update(self, episode, score) self.target_network.set_weights(self.train_network.get_weights())
maxMemory = 60000 gamma = 0.9 batchSize = 32 nLastStates = 4 epsilon = 1. epsilonDecayRate = 0.0002 minLastEpsilon = 0.05 filePathToSave = 'model2.h5' #Initializing the environment, the brain and the Experience Replay Memory env = Environment(0) brain = Brain((env.nColumns, env.nRows, nLastStates), learningRate) model = brain.model DQN = Dqn(maxMemory, gamma) #Building a function that will reset current state and next state def resetStates(): currentState = np.zeros((1, env.nColumns, env.nRows, nLastStates)) for i in range(nLastStates): currentState[0, :, :, i] = env.screenMap return currentState, currentState #Return current state and next state which are the same at the beginning #Starting the main loop epoch = 0 nCollected = 0
from kivy.lang import Builder from kivy.uix.widget import Widget from kivy.config import Config from kivy.properties import NumericProperty, ReferenceListProperty, ObjectProperty from kivy.vector import Vector from kivy.uix.button import Button from kivy.graphics import Color, Ellipse, Line from dqn import Dqn Config.set('input', 'mouse', 'mouse,multitouch_on_demand') last_x = 0 last_y = 0 n_points = 0 length = 0 brain = Dqn(5, 3, 0.9) a_rotation = [0, 20, -20] last_reward = 0 scores = [] first_update = True def init(): global sand global goal_x global goal_y global first_update sand = np.zeros((longueur, largeur)) goal_x = 20 goal_y = largeur - 20 first_update = False
loadednn = keras.models.load_model('saved_learning.nn') else: print("no checkpoint found...") return loadednn ################################################################ ################################################################ env = gym.make('LunarLander-v2') # create gym the environment print(env.action_space.n) print(env.observation_space.shape[0]) nnet = Dqn(env.action_space, env.observation_space.shape[0], LEARN_NN_SIZE, LEARN_EPS_DECAY, LEARN_ACTION_DEPTH, LEARN_ACTION_DISCOUNT) if LEARN_RELOAD: nnet.nn = load() for episode in range(NB_EPISODES): print('------- Episode : ' + str(episode)) episode_rewards = 0 state = State(env.reset()) # initial state of the environement action = Action.DO_NOTHING learn = episode <= LEARN_EPISODES for step in range( NB_STEPS_PER_EPISODE): # max number of steps for one episode
from kivy.clock import Clock # Importing the Dqn object from our AI in ia.py from dqn import Dqn # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 # the total number of points in the last drawing length = 0 # the length of the last drawing # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = Dqn(5, 3, 0.9) # 5 sensors, 3 actions, gama = 0.9 # action = 0 => no rotation, action = 1 => rotate 20 degres, action = 2 => rotate -20 degres action2rotation = [0, 20, -20] last_reward = 0 # initializing the last reward # initializing the mean score curve (sliding window of the rewards) with respect to time scores = [] # Initializing the map first_update = True # using this trick to initialize the map only once def init(): # sand is an array that has as many cells as our graphic interface has pixels. Each cell has a one if there is sand, 0 otherwise. global sand # x-coordinate of the goal (where the car has to go, that is the airport or the downtown) global goal_x