Beispiel #1
0
def main():
    # argument parser to specify hyperparameters
    parser = argparse.ArgumentParser()
    parser.add_argument("--train", help="training run", action="store_true")
    parser.add_argument("--test", help="test run", action="store_true")
    parser.add_argument("--network_type",
                        default="mlp",
                        help="""type of the policy network ["mlp", "gru"]""",
                        type=str)
    parser.add_argument(
        "--num_agents",
        default=8,
        help="number of environments and agents running in parallel",
        type=int)
    parser.add_argument(
        "--num_steps",
        default=32,
        help="number of steps on each environment for every update",
        type=int)
    parser.add_argument("--environment",
                        default="LunarLanderContinuous-v2",
                        help="gym environment type",
                        type=str)
    args = parser.parse_args()

    # Launch tensorboard
    tb = program.TensorBoard()
    tb.configure(argv=[None, '--logdir', './logs'])
    tb.launch()

    # Initialize ray
    ray.init(memory=1024 * 512 * 200, object_store_memory=1024 * 1024 * 1000)

    if args.train:
        # start training run with given hyperparameters
        coord = Coordinator(num_agents=args.num_agents,
                            network=args.network_type,
                            env_name=args.environment,
                            num_steps=args.num_steps)
        coord.train()

    if args.test:
        # start run with latest model checkpoint
        from test import test_run
        test_run(network=args.network_type, environment=args.environment)

    ray.shutdown()
Beispiel #2
0
import os
import sys

current_path = os.getcwd()
sys.path.append(current_path)
from hypertune import start_commander, start_workers
from coordinator import Coordinator
from config import mode, number_workers, tuned_config, fix_random_seed
import numpy as np

if fix_random_seed:
    np.random.seed(123)

# %matplotlib inline

if mode == 'parallel':
    start_commander()
    workers = start_workers(number_workers)
else:
    model = Coordinator(tuned_config, '2900')
    #################### to do ####################
    # model = Coordinator(tuned_config, '-5.28')
    # model.restore_price_predictor('-5.28-80000-')
    ##############################################
    model.train('single', True)
    model.back_test('test', 2500, True)
Beispiel #3
0
coo = Coordinator(agent)

env = PortfolioEnv(df_train,
                   steps=256,
                   trading_cost=0.00007,
                   window_length=window_length,
                   scale=False,
                   random_reset=False)

ob = env.reset()
for i in range(5):
    print(coo.action_values(ob))
    ob, a, r, ob_ = env.step(np.ones(5))

coo.train(env,
          total_training_step=total_training_step,
          replay_period=replay_period,
          tensorboard=True)

env_test = PortfolioEnv(df_train,
                        steps=2500,
                        trading_cost=0.0,
                        window_length=window_length,
                        scale=False,
                        random_reset=False)
ob = env_test.reset()
for i in range(10):
    print(coo.action_values(ob))
    print(np.argmax(coo.action_values(ob)))
    ob, a, r, ob_ = env.step(np.ones(5))

# coo.restore('')
import os
import sys
current_path = os.getcwd()
sys.path.append(current_path)
from coordinator import Coordinator
from config import tuned_config

model = Coordinator(tuned_config, 'name')
model.train()
model.restore('name-step')