def get_test_performance(epoch, modelFilename='model.h5', model=None):

    import codecs
    codeListFilename = 'input_code.csv'

    codeMap = {}
    f = codecs.open(codeListFilename, "r", "utf-8")
    for line in f:
        if line.strip() != "":
            tokens = line.strip().split(
                ",") if not "\t" in line else line.strip().split("\t")
            codeMap[tokens[0]] = tokens[1]

    f.close()
    env = MarketEnv(dir_path = "./If_index/", target_codes = \
     codeMap.keys(),  start_date = "2015-05-29", \
     end_date = "2016-08-25", sudden_death = -1.0)
    target_close = env.get_close()

    from keras.optimizers import SGD
    if (model == None and modelFilename == 'model_dqn.h5'):
        model = MarketModelBuilder(modelFilename).getModel()
    elif (model == None and modelFilename == 'model_pg.h5'):
        model = MarketPolicyGradientModelBuilder(modelFilename).getModel()

    loss = 0.
    game_over = False
    # get initial input
    input_t = env.reset()
    cumReward = 0
    cum_profit = {}
    pre_action = {}
    while not game_over:
        input_tm1 = input_t
        q = model.predict(input_tm1)
        action = np.argmax(q[0])
        input_t, reward, game_over, info = env.step(action)
        cumReward += reward
        cum_profit[info["dt"]] = cumReward
        if env.actions[action] == "LONG" or env.actions[action] == "SHORT":
            pre_action[info['dt']] = env.actions[action]
            color = bcolors.FAIL if env.actions[
                action] == "LONG" else bcolors.OKBLUE
            print "%s:\t%s\t%d\t%.2f\t%.2f\t" % (info["dt"], color + env.actions[action] + \
             bcolors.ENDC, info['correct_action'], cumReward, info["cum"]) + ("\t".join(["%s:%.2f" % (l, i)\
              for l, i in zip(env.actions, q[0].tolist())]) )

    print len(cum_profit.keys()), len(target_close)
    plot_profit(cum_profit, target_close, pre_action, "test_" + str(epoch))
    return cum_profit, target_close
Exemple #2
0
                    target_codes=list(codeMap.keys()),
                    input_codes=[],
                    start_date="2013-08-26",
                    end_date="2015-08-25",
                    sudden_death=-1.0)

    # parameters
    epsilon = .5  # exploration
    min_epsilon = 0.1
    epoch = 100000
    max_memory = 5000
    batch_size = 128
    discount = 0.8

    from keras.optimizers import SGD
    model = MarketModelBuilder(modelFilename).getModel()
    sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='mse', optimizer='rmsprop')

    # Initialize experience replay object
    exp_replay = ExperienceReplay(max_memory=max_memory, discount=discount)

    # Train
    win_cnt = 0
    for e in range(epoch):
        loss = 0.
        env.reset()
        game_over = False
        # get initial input
        input_t = env.reset()
        cumReward = 0
Exemple #3
0
from market_model_builder import MarketModelBuilder
from keras.optimizers import SGD
from environment import MarketEnv
import numpy as np

model_filename = "model.h5"
model = MarketModelBuilder(model_filename).getModel()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mse', optimizer='rmsprop')

model.load_weights("model.h5")

env = MarketEnv(target_symbols=["1"],
                input_symbols=[],
                start_date="2015/04/27/",
                end_date="2018/04/27",
                sudden_death=-1.0)

data = env.get_data(symbol="1")
data_list = list(data.values())

prediction = []
for j in range(len(data_list)):
    if j < 59:
        continue

    state = []
    subject = []
    subject_vol = []
    for i in range(60):
        subject.append([data_list[j - i][2]])
Exemple #4
0
    f.close()

    env = MarketEnv(target_symbols=list(instruments.keys()), input_symbols=[], 
        start_date="2010-08-26", 
        end_date="2015-08-25", 
        sudden_death=-1.0)

    # parameters
    epsilon = 0.5  # exploration
    min_epsilon = 0.1
    epoch = 100000
    max_memory = 5000
    batch_size = 128
    discount = 0.8

    model = MarketModelBuilder(model_filename).getModel()
    sgd = SGD(lr = 0.001, decay = 1e-6, momentum = 0.9, nesterov = True)
    model.compile(loss='mse', optimizer='rmsprop')

    # Initialize experience replay object
    exp_replay = ExperienceReplay(max_memory = max_memory, discount = discount)

    # Train
    win_cnt = 0
    for e in range(epoch):
        loss = 0.
        env._reset()
        game_over = False
        # get initial input
        input_t = env._reset()
        cumReward = 0
Exemple #5
0
def train():
    import sys
    import codecs

    codeListFilename = sys.argv[1]
    modelFilename = sys.argv[2] if len(sys.argv) > 2 else None

    codeMap = {}
    f = codecs.open(codeListFilename, "r", "utf-8")

    for line in f:
        if line.strip() != "":
            tokens = line.strip().split(
                ",") if not "\t" in line else line.strip().split("\t")
            codeMap[tokens[0]] = tokens[1]

    f.close()

    env = MarketEnv(dir_path = "./If_index/", target_codes = \
     codeMap.keys(),  start_date = "2013-05-26", \
     end_date = "2015-08-25", sudden_death = -1.0)
    target_close = env.get_close()
    # parameters
    epsilon = .45  # exploration
    min_epsilon = 0.1
    epoch = 1000
    max_memory = 5000
    batch_size = 256
    discount = 0.9
    update_target_weight_step = 10
    from keras.optimizers import SGD
    model = MarketModelBuilder(modelFilename).getModel()
    target_model = MarketModelBuilder(modelFilename).getModel()

    sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='mse', optimizer='rmsprop')
    target_model.compile(loss='mse', optimizer='rmsprop')

    # Initialize experience replay object
    exp_replay = ExperienceReplay(max_memory=max_memory, discount=discount)

    # Train
    win_cnt = 0
    for e in range(epoch):
        loss = 0.
        env.reset()
        game_over = False
        # get initial input
        input_t = env.reset()
        cumReward = 0
        cum_profit = {}
        pre_action = {}

        iter_cnt = 0
        while not game_over:
            input_tm1 = input_t
            isRandom = False

            # get next action
            if np.random.rand() <= epsilon:
                action = np.random.randint(0, env.action_space.n, size=1)[0]
                isRandom = True
            else:
                q = model.predict(input_tm1)
                action = np.argmax(q[0])
                if np.nan in q:
                    print "OCCUR NaN!!!"
                    exit()

            # apply action, get rewards and new state
            input_t, reward, game_over, info = env.step(action)
            cumReward += reward
            cum_profit[info["dt"]] = cumReward
            if env.actions[action] == "LONG" or env.actions[action] == "SHORT":
                color = bcolors.FAIL if env.actions[
                    action] == "LONG" else bcolors.OKBLUE
                pre_action[info['dt']] = env.actions[action]
                if isRandom:
                    color = bcolors.WARNING if env.actions[
                        action] == "LONG" else bcolors.OKGREEN
                print "%s:\t%s\t%d\t%.5f\t%.2f\t" % (info["dt"], color + env.actions[action] + \
                 bcolors.ENDC, info['correct_action'], cumReward, info["cum"]) + ("\t".join(["%s:%.2f" % (l, i)\
                  for l, i in zip(env.actions, q[0].tolist())]) if isRandom == False else "")

            # store experience
            exp_replay.remember([input_tm1, action, reward, input_t],
                                game_over)

            # adapt model
            if (len(exp_replay.memory) >= batch_size):
                inputs, targets = exp_replay.get_batch(target_model,
                                                       batch_size=batch_size)
                loss += model.train_on_batch(inputs, targets)
            if (iter_cnt % update_target_weight_step == 0
                ):  # update target estimator every 5 step
                print "update target model weights"
                target_model = copy_model_weight(model, target_model)
            iter_cnt += 1

        if cumReward > 0 and game_over:
            win_cnt += 1
        test_util.plot_profit(cum_profit, target_close, pre_action,
                              "dqn_train_" + str(e))
        test_util.get_test_performance(e, 'model_dqn.h5', model)

        print("Epoch {:03d}/{} | Loss {:.4f} | Win count {} | Epsilon {:.4f}".
              format(e, epoch, loss, win_cnt, epsilon))
        # Save trained model weights and architecture, this will be used by the visualization code
        if (e / 5 == 0 and e != 0):
            model.save_weights(
                "model.h5" if modelFilename == None else modelFilename,
                overwrite=True)
        epsilon = max(min_epsilon, epsilon * 0.99)
    env = MarketEnv(target_symbols=list(instruments.keys()),
                    input_symbols=[],
                    start_date="2015/04/27",
                    end_date="2018/04/27",
                    sudden_death=-1.0)

    # parameters
    epsilon = 0.5  # exploration
    min_epsilon = 0.1
    epoch = 100000
    max_memory = 5000
    batch_size = 128
    discount = 0.8

    model = MarketModelBuilder(model_filename).getModel()
    model.summary()
    sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='mse', optimizer='rmsprop')
    model.load_weights("model.h5")

    # Initialize experience replay object
    exp_replay = ExperienceReplay(max_memory=max_memory, discount=discount)

    # Train
    win_cnt = 0
    for e in range(epoch):
        loss = 0.
        env.reset()
        game_over = False
        # get initial input