Example #1
0
def run_dqn(name_asset, n_features, n_neurons, n_episodes, batch_size,
            random_action_decay, future_reward_importance):

    # returns a list of stocks closing price
    df = pd.read_csv(INPUT_CSV_TEMPLATE % name_asset)
    data = df['Close'].astype(
        float).tolist()  #https://www.kaggle.com/camnugent/sandp500
    l = len(data) - 1

    print(
        f'Running {n_episodes} episodes, on {name_asset} (has {l} rows), features={n_features}, '
        f'batch={batch_size}, random_action_decay={random_action_decay}')
    dqn = Dqn()
    profit_vs_episode, trades_vs_episode, epsilon_vs_episode, model_name, num_trains, eps = \
        dqn.learn(data, n_episodes, n_features, batch_size, USE_EXISTING_MODEL, RANDOM_ACTION_MIN,
                  random_action_decay, n_neurons, future_reward_importance)

    print(f'Learning completed. Backtest the model {model_name} on any stock')
    print('python backtest.py ')

    print(f'see plot of profit_vs_episode = {profit_vs_episode[:10]}')
    plot_barchart(profit_vs_episode, "episode vs profit", "episode vs profit",
                  "total profit", "episode", 'green')

    print(f'see plot of trades_vs_episode = {trades_vs_episode[:10]}')
    plot_barchart(trades_vs_episode, "episode vs trades", "episode vs trades",
                  "total trades", "episode", 'blue')

    text = f'{name_asset} ({l}), features={n_features}, nn={n_neurons},batch={batch_size}, ' \
           f'epi={n_episodes}({num_trains}), eps={np.round(eps, 1)}({np.round(random_action_decay, 5)})'
    print(f'see plot of epsilon_vs_episode = {epsilon_vs_episode[:10]}')
    plot_barchart(epsilon_vs_episode, "episode vs epsilon",
                  "episode vs epsilon",
                  "epsilon(probability of random action)", text, 'red')
    print(text)
Example #2
0
def warehouse():
    from gymcolab.envs.warehouse import Warehouse
    from models import VanillaDqnModel

    args = dict(
        buffersize=100000,
        lr=0.001,
        target_update_ratio=100,
        gamma=0.99,
        episodes=10000,
        update_begin=75,
        init_eps=0.5,
        terminal_eps=0.0,
        batchsize=32,
    )
    device = "cuda"

    env = Warehouse()
    n_nodes, height, width = env.observation_space.shape
    n_act = env.action_space.n - 1
    assert height == width, "Environment map must be square"
    model = VanillaDqnModel(n_nodes, height, n_act)
    optim = torch.optim.Adam(model.parameters(), lr=args["lr"])
    dqn = Dqn(model, optim, args["buffersize"])
    dqn.to(device)
    learn(args, env, dqn)
Example #3
0
def bt(data, num_features, use_existing_model, model_name):
    dqn          = Dqn()
    agent        = Agent(num_features, use_existing_model, model_name)
    state            = dqn.get_state(data, num_features, num_features)
    total_profits    = 0
    total_holds      = 0
    total_buys       = 1
    total_sells      = 0
    l = len(data) - 1

    for t in range(num_features,l):

        action = agent.choose_best_action(state)#it will always predict

        reward, total_profits, total_holds, total_buys, total_sells = dqn.execute_action (action, data[t], t, total_profits, total_holds, total_buys, total_sells)

        done = True if t == l - 1 else False

        next_state = dqn.get_state(data, t + 1, num_features)
        print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}')
        state = next_state

        if done:
            print("-----------------------------------------")
            print(f'Total Profit: {formatPrice(total_profits)} , Total hold/buy/exit trades: {total_holds} / {total_buys} / {total_sells}')
            print("-----------------------------------------")
Example #4
0
def warehouse_graph():
    from gymcolab.envs.warehouse import Warehouse
    from models import GraphDqnModel

    args = dict(
        buffersize=100000,
        lr=0.0001,
        target_update_ratio=100,
        gamma=0.99,
        episodes=10000,
        update_begin=75,
        init_eps=0.9,
        terminal_eps=0.1,
        batchsize=32,
    )
    device = "cuda"

    env = Warehouse()
    n_nodes, height, width = env.observation_space.shape
    n_act = env.action_space.n
    assert height == width, "Environment map must be square"
    adj = torch.tensor([[[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 0, 0],
                         [0, 0, 0, 0]],
                        [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1],
                         [0, 0, 0, 0]],
                        [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
                         [0, 1, 0, 0]]]).float().to(device)
    n_edges = 3

    class GraphDqnModelAdj(GraphDqnModel):
        def __init__(self, n_edges, n_nodes, mapsize, n_act, adj):
            super().__init__(n_edges, n_nodes, mapsize, n_act)
            self.adj = adj

        def forward(self, objmap):
            return super().forward(self.adj, objmap)

    model = GraphDqnModelAdj(n_edges, n_nodes, height, n_act, adj)
    optim = torch.optim.Adam(model.parameters(), lr=args["lr"])
    dqn = Dqn(model, optim, args["buffersize"])
    dqn.to(device)
    learn(args, env, dqn)
Example #5
0
def cartpole():
    args = dict(buffersize=100000,
                lr=0.001,
                target_update_ratio=200,
                gamma=0.99,
                episodes=1000,
                update_begin=78,
                init_eps=0.7,
                terminal_eps=0.1,
                batchsize=32)

    env = gym.make("CartPole-v1")
    in_size = env.observation_space.shape[0]
    n_act = env.action_space.n
    model = FullyConnectedModel(in_size, n_act)
    optim = torch.optim.Adam(model.parameters(), lr=args["lr"])
    dqn = Dqn(model, optim, args["buffersize"])

    learn(args, env, dqn)
def bt(data, n_features, use_existing_model, name_model):
    dqn = Dqn()
    dqn.open_orders = [data[0]]
    agent = Agent(n_features, use_existing_model, name_model)
    state = dqn.get_state(data, n_features, n_features)
    total_profits = 0
    total_holds = 0
    total_buys = 1
    total_sells = 0
    total_notvalid = 0
    l = len(data) - 1

    for t in range(n_features, l):

        action = agent.choose_best_action(state)  # it will always predict

        reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \
            dqn.execute_action(action, data[t], t, total_profits, total_holds, total_buys, total_sells, total_notvalid)

        done = True if t == l - 1 else False

        next_state = dqn.get_state(data, t + 1, n_features)
        #print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}')
        state = next_state

        if done:
            # sell position at end of episode
            reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \
                dqn.execute_action(2, data[t+1], t+1, total_profits, total_holds, total_buys, total_sells,
                                   total_notvalid)
            print("-----------------------------------------")
            print(
                f'Total Profit: {formatPrice(total_profits*100)} ,'
                f' Total hold/buy/sell/notvalid trades: {total_holds} / {total_buys} / {total_sells} / {total_notvalid}'
            )
            print("-----------------------------------------")
Example #7
0
batch_size = 1  # (int) > 0 size of a batched sampled from replay buffer for training
random_action_decay = 0.8993  # (float) 0-1
future_reward_importance = 0.9500  # (float) 0-1 aka decay or discount rate, determines the importance of future
# rewards.If=0 then agent will only learn to consider current rewards. if=1 it will make it strive for a long-term
# high reward.

# do not touch those params
random_action_min = 0.0  # (float) 0-1 do not touch this
use_existing_model = False  # (bool)      do not touch this
data = getStockDataVec(stock_name)  # https://www.kaggle.com/camnugent/sandp500
l = len(data) - 1
print(
    f'Running {episodes} episodes, on {stock_name} (has {l} rows), features={num_features}, batch={batch_size}, random_action_decay={random_action_decay}'
)

dqn = Dqn()
profit_vs_episode, trades_vs_episode, epsilon_vs_episode, model_name, num_trains, eps = \
    dqn.learn(data, episodes, num_features, batch_size, use_existing_model, random_action_min, random_action_decay,
              num_neurons, future_reward_importance)

print(
    f'i think i learned to trade. now u can backtest the model {model_name} on any stock'
)
print('python backtest.py ')
minutes = np.round((time.time() - start_time) / 60, 1)  # minutes
text = f'{stock_name} ({l}),t={minutes}, features={num_features}, nn={num_neurons},batch={batch_size}, epi={episodes}({num_trains}), eps={np.round(eps, 1)}({np.round(random_action_decay, 5)})'

print(f'see plot of profit_vs_episode = {profit_vs_episode[:10]}')
plot_barchart(profit_vs_episode, "episode vs profit", "episode vs profit",
              "total profit", "episode", 'green')
Example #8
0
 def __init__(self, env):
     Dqn.__init__(self, env)
     self.save_path = 'app/saves/ddqn'
     self.train_network = self.create_network()
     self.target_network = self.create_network()
     self.target_network.set_weights(self.train_network.get_weights())
Example #9
0
 def done_update(self, episode, score):
     Dqn.done_update(self, episode, score)
     self.target_network.set_weights(self.train_network.get_weights())
Example #10
0
maxMemory = 60000
gamma = 0.9
batchSize = 32
nLastStates = 4

epsilon = 1.
epsilonDecayRate = 0.0002
minLastEpsilon = 0.05

filePathToSave = 'model2.h5'

#Initializing the environment, the brain and the Experience Replay Memory
env = Environment(0)
brain = Brain((env.nColumns, env.nRows, nLastStates), learningRate)
model = brain.model
DQN = Dqn(maxMemory, gamma)


#Building a function that will reset current state and next state
def resetStates():
    currentState = np.zeros((1, env.nColumns, env.nRows, nLastStates))

    for i in range(nLastStates):
        currentState[0, :, :, i] = env.screenMap

    return currentState, currentState  #Return current state and next state which are the same at the beginning


#Starting the main loop
epoch = 0
nCollected = 0
Example #11
0
from kivy.lang import Builder
from kivy.uix.widget import Widget
from kivy.config import Config
from kivy.properties import NumericProperty, ReferenceListProperty, ObjectProperty
from kivy.vector import Vector
from kivy.uix.button import Button
from kivy.graphics import Color, Ellipse, Line
from dqn import Dqn

Config.set('input', 'mouse', 'mouse,multitouch_on_demand')

last_x = 0
last_y = 0
n_points = 0
length = 0
brain = Dqn(5, 3, 0.9)
a_rotation = [0, 20, -20]
last_reward = 0
scores = []
first_update = True


def init():
    global sand
    global goal_x
    global goal_y
    global first_update
    sand = np.zeros((longueur, largeur))
    goal_x = 20
    goal_y = largeur - 20
    first_update = False
Example #12
0
        loadednn = keras.models.load_model('saved_learning.nn')
    else:
        print("no checkpoint found...")

    return loadednn


################################################################
################################################################

env = gym.make('LunarLander-v2')  # create gym the environment

print(env.action_space.n)
print(env.observation_space.shape[0])

nnet = Dqn(env.action_space, env.observation_space.shape[0], LEARN_NN_SIZE,
           LEARN_EPS_DECAY, LEARN_ACTION_DEPTH, LEARN_ACTION_DISCOUNT)

if LEARN_RELOAD:
    nnet.nn = load()

for episode in range(NB_EPISODES):

    print('------- Episode : ' + str(episode))
    episode_rewards = 0
    state = State(env.reset())  # initial state of the environement
    action = Action.DO_NOTHING

    learn = episode <= LEARN_EPISODES

    for step in range(
            NB_STEPS_PER_EPISODE):  # max number of steps for one episode
Example #13
0
from kivy.clock import Clock

# Importing the Dqn object from our AI in ia.py
from dqn import Dqn

# Adding this line if we don't want the right click to put a red point
Config.set('input', 'mouse', 'mouse,multitouch_on_demand')

# Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map
last_x = 0
last_y = 0
n_points = 0  # the total number of points in the last drawing
length = 0  # the length of the last drawing

# Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function
brain = Dqn(5, 3, 0.9)  # 5 sensors, 3 actions, gama = 0.9
# action = 0 => no rotation, action = 1 => rotate 20 degres, action = 2 => rotate -20 degres
action2rotation = [0, 20, -20]
last_reward = 0  # initializing the last reward
# initializing the mean score curve (sliding window of the rewards) with respect to time
scores = []

# Initializing the map
first_update = True  # using this trick to initialize the map only once


def init():
    # sand is an array that has as many cells as our graphic interface has pixels. Each cell has a one if there is sand, 0 otherwise.
    global sand
    # x-coordinate of the goal (where the car has to go, that is the airport or the downtown)
    global goal_x