コード例 #1
0
    # disable GPU
    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    # specify what to test
    delta_action_test = False
    bartlett_action_test = False

    # specify weights file to load
    tag = "49"

    # set init_ttm, spread, and other parameters according to the env that the model is trained
    env_test = TradingEnv(continuous_action_flag=True,
                          sabr_flag=True,
                          dg_random_seed=2,
                          spread=0.01,
                          num_contract=1,
                          init_ttm=20,
                          trade_freq=1,
                          num_sim=100001)
    ddpg_test = DDPG(env_test)

    print("\n\n***")
    if delta_action_test:
        print("Testing delta actions.")
    else:
        print("Testing agent actions.")
        if tag == "":
            print("tesing the model saved at the end of the training.")
        else:
            print("Testing model saved at " + tag + "K episode.")
        ddpg_test.load(tag=tag)
コード例 #2
0
ファイル: run.py プロジェクト: Rinuys/RLTradingBot
                      help='either "train" or "test"')
  parser.add_argument('-w', '--weights', type=str, help='a trained model weights')
  args = parser.parse_args()

  maybe_make_dir('weights')
  maybe_make_dir('portfolio_val')

  timestamp = time.strftime('%Y%m%d%H%M')

  data = np.around(get_data())
  data_size = data.shape[1]
  data_cut_point = int(0.75*data_size)
  train_data = data[:, :data_cut_point]
  test_data = data[:, data_cut_point:]

  env = TradingEnv(train_data, args.initial_invest)
  state_size = env.observation_space.shape
  action_size = env.action_space.n
  agent = DQNAgent(state_size, action_size)
  scaler = get_scaler(env)

  portfolio_value = []

  if args.mode == 'test':
    # remake the env with test data
    env = TradingEnv(test_data, args.initial_invest)
    # load trained weights
    agent.load(args.weights)
    # when test, the timestamp is same as time when weights was trained
    timestamp = re.findall(r'\d{12}', args.weights)[0]
コード例 #3
0
ファイル: run.py プロジェクト: chuddster/dqn-trader
    data_size = data[0].shape[0]
    end_row_train = (int)(data_size * (args.ratio / 100))
    end_row_validate = (data_size - end_row_train) // 2 + end_row_train

    train_data = np.array([d[:end_row_train, :] for d in data])
    validation_data = np.array(
        [d[end_row_train:end_row_validate, :] for d in data])
    test_data = np.array([d[end_row_validate:, :] for d in data])
    '''
    print("There are {} rows".format(data_size))
    print("The training data spans from 0 to {}".format(end_row_train-1))
    print("The validation data spans from {} to {}".format(end_row_train, end_row_validate-1))
    print("The test data spans from {} to {}".format(end_row_validate, data_size))
    '''

    env = TradingEnv(train_data, args.initial_invest)
    state_size = env.observation_space.shape
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size, N_HIDDEN_LAYERS)
    scaler = get_scaler(env)

    portfolio_value = []

    # Append initial account value
    portfolio_value.append(args.initial_invest)

    if args.mode != 'train':
        # remake the env with validation data
        env = TradingEnv(
            validation_data if args.mode == 'validate' else test_data,
            args.initial_invest)
コード例 #4
0
from flask import Flask, render_template, request, jsonify
import pickle
import time
import numpy as np
import argparse
import re
import itertools

from envs import TradingEnv
from agent import DQNAgent
from utils import get_data, get_scaler, maybe_make_dir

app = Flask(__name__)

data, pred, pred5 = get_data()
env = TradingEnv(data, pred, pred5, 20000)
state_size = env.observation_space.shape
action_size = env.action_space.n
action_combo = list(map(list, itertools.product([0, 1, 2], repeat=3)))
action_map = {0: "sell", 1: "hold", 2: "buy"}


@app.route('/')
def index():
    return render_template('index.html')


@app.route('/', methods=['POST'])
def advise():
    n1 = float(request.form['n1'])
    n2 = float(request.form['n2'])
コード例 #5
0
def train():
    profits_list = [
    ]  # Will hold list of all profits as we go through training

    # Given command line input as below

    # if len(sys.argv) != 4:
    #     print("Usage: python train.py [stock] [window] [episodes]")
    #     exit()

    with open(os.path.join(os.path.dirname(__file__), 'config.yml'),
              'r') as stream:
        config = yaml.load(stream)

    # Unpackage data from terminal/config
    # stock_name, window_size, episode_count = sys.argv[1], int(sys.argv[2]), int(sys.argv[3])
    stock_name, window_size, episode_count = config['stock_name'], config[
        'window_size'], config["num_epochs"]

    num_tech_indicators = config['num_tech_indicators']
    agent = Agent(window_size + num_tech_indicators, config)
    data = getStockDataVec(stock_name)
    env = TradingEnv(data, window_size)
    l = len(data) - 1

    for e in range(episode_count + 1):
        print("Episode " + str(e) + "/" + str(episode_count))
        state = env.get_state(0)

        env.reset_holdings()

        for t in range(l):
            action = agent.act(state)

            # sit
            next_state = env.get_state(t + 1)
            reward = 0

            if action == 1:  # buy
                #remembers the price bought at t, and the time bought
                env.buy(t)
                # print("Buy: " + formatPrice(data[t]))

            elif action == 2:  # sell
                reward, profit = env.sell(t)
                # print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(profit))

            done = True if t == l - 1 else False
            # Push all values to memory
            agent.memory.push(state, action, next_state, reward)
            state = next_state
            total_profit = env.net_profit(t)
            max_staked = env.max_spent

            if done:
                percent_return = total_profit / max_staked * 100
                print("--------------------------------")
                print("Total Profit: " + formatPrice(total_profit))
                print("Max staked: " + formatPrice(max_staked))
                print("Percent return: " + "{0:.2f}%".format(percent_return))
                print("--------------------------------")
                profits_list.append((total_profit, percent_return))
                # print(profits_list)
            agent.optimize()

        if e % config['save_freq'] == 0:
            agent.target_net.load_state_dict(agent.policy_net.state_dict())
            torch.save(agent.policy_net, config['policy_model'])
            torch.save(agent.target_net, config['target_model'])
コード例 #6
0
def DqnProgram(args, setResult, training_result):
    parser = argparse.ArgumentParser()
    parser.add_argument('-e',
                        '--episode',
                        type=int,
                        default=2000,
                        help='number of episode to run')
    parser.add_argument('-b',
                        '--batch_size',
                        type=int,
                        default=32,
                        help='batch size for experience replay')
    parser.add_argument('-i',
                        '--initial_invest',
                        type=int,
                        default=20000,
                        help='initial investment amount')
    parser.add_argument('-m',
                        '--mode',
                        type=str,
                        required=True,
                        help='either "train" or "test"')
    parser.add_argument('-w',
                        '--weights',
                        type=str,
                        help='a trained model weights')
    args = parser.parse_args(args)

    maybe_make_dir('weights')
    maybe_make_dir('portfolio_val')

    import time
    timestamp = time.strftime('%Y%m%d%H%M')
    data = get_data(mode=args.mode)  # TODO UI의 종목과 연결시키기.
    data = np.array([c['종가'] for c in data])

    env = TradingEnv(data, args.initial_invest)
    state_size = env.observation_space.shape
    action_size = env.action_space.shape
    agent = DQNAgent(state_size, action_size)
    scaler = get_scaler(env)

    portfolio_value = []

    if not args.weights is None:
        agent.load(args.weights)
        timestamp = re.findall(r'\d{12}', args.weights)[0]

    for e in range(args.episode):
        state = env.reset()
        state = scaler.transform([state])
        for time in range(env.n_step):
            action = agent.act(state)
            next_state, reward, done, info = env.step(action)
            next_state = scaler.transform([next_state])
            if args.mode == 'train':
                agent.remember(state, action, reward, next_state, done)
            state = next_state
            if done:
                msg = "episode: {}/{}, episode end value: {}".format(
                    e + 1, args.episode, info['cur_val'])
                print(msg)
                setResult(msg=msg)
                training_result.append(info['cur_val'])
                portfolio_value.append(
                    info['cur_val'])  # append episode end portfolio value
                break
            if args.mode == 'train' and len(agent.memory) > args.batch_size:
                agent.replay(args.batch_size)
        if args.mode == 'train' and (e + 1) % 10 == 0:  # checkpoint weights
            agent.save('weights/{}-dqn.h5'.format(timestamp))

    # save portfolio value history to disk
    with open('portfolio_val/{}-{}.p'.format(timestamp, args.mode),
              'wb') as fp:
        pickle.dump(portfolio_value, fp)
コード例 #7
0
    timestamp = time.strftime('%Y%m%d%H%M')

    stocks = Stocks()
    portfolio = Portfolio()

    data = np.around(get_data(ticker=args.ticker))
    dataSplit = data.shape[1] - args.testingDays
    train_data = data[:, :dataSplit]
    test_data = data[:, dataSplit:]

    dates = (get_dates(ticker=args.ticker))
    dataesSplit = dates.shape[1] - args.testingDays
    train_dates = dates[:, :dataSplit]
    test_dates = dates[:, dataSplit:]

    env = TradingEnv(stocks, train_data, train_dates, args.initial_invest)
    state_size = env.observation_space.shape
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size)
    scaler = get_scaler(env)

    portfolio_value = []

    if args.mode == 'test':
        # remake the env with test data
        env = TradingEnv(stocks, test_data, test_dates, args.initial_invest)
        # load trained weights
        agent.load(args.weights)
        # when test, the timestamp is same as time when weights was trained
        timestamp = re.findall(r'\d{12}', args.weights)[0]