Esempio n. 1
0
def main(env_id, train, logdir, exploration, env_dt, **kwargs):
    sess = tf.InteractiveSession()  # start a tensorflow system
    env = gym.make(env_id)

    u_bound = (env.action_space.low, env.action_space.high)
    sig = inspect.signature(DDPG)
    ddpg_kwargs = dict()
    for key in sig.parameters:
        if key in kwargs:
            ddpg_kwargs[key] = kwargs[key]
            kwargs.pop(key)

    action_dim = env.action_space.shape[
        0]  # get the dimesnions of action space
    state_dim = env.observation_space.shape[
        0]  # get the dimensions of state space
    noise = U.OUNoise(action_dim, 0.0, 0.15, 0.2, 0.05, exploration,
                      env_dt)  # noise implementation
    print(noise)
    agent = DDPG(sess,
                 state_dim,
                 action_dim,
                 u_bound=u_bound,
                 noise=noise,
                 **ddpg_kwargs)  # creating a ddpg agent

    play = U.Play(sess, env, agent, logdir)  # play the board

    if train:
        play.train(kwargs['nb_episodes'],
                   kwargs['nb_eval_episodes'])  # training

    play.run_env(train=False)
    env.close()
Esempio n. 2
0
 def get_agent(self):
     agent_type = self.cfg["Agent"]["Type"]
     mode = self.cfg["Agent"]["Setup"]["mode"]
     if agent_type == "DDPG":
         return DDPG(self.action_mode, self.obs_config, self.task_class,
                     self.cfg)
     elif agent_type == "TD3":
         return TD3(self.action_mode, self.obs_config, self.task_class,
                    self.cfg)
     elif agent_type == "OpenAIES":
         # We use DDPG's validation methods as it is faster
         if mode == "validation_mult":
             return DDPG(self.action_mode, self.obs_config, self.task_class,
                         self.cfg)
         else:
             return OpenAIES(self.action_mode, self.obs_config,
                             self.task_class, self.cfg)
     else:
         raise ValueError(
             "%s is not a supported agent type. Please check your config-file."
             % agent_type)
def session(config,mode):
    from data.environment import Environment
    codes, start_date, end_date, features, agent_config, market,predictor, framework, window_length,noise_flag, record_flag, plot_flag,reload_flag,trainable,method=parse_config(config,mode)
    env = Environment(start_date, end_date, codes, features, int(window_length),market)


    global M
    M=len(codes)+1

    if framework == 'DDPG':
        print("*-----------------Loading DDPG Agent---------------------*")
        from agents.ddpg import DDPG
        agent = DDPG(predictor, len(codes) + 1, int(window_length), len(features), '-'.join(agent_config), reload_flag,trainable)

    elif framework == 'PPO':
        print("*-----------------Loading PPO Agent---------------------*")
        from agents.ppo import PPO
        agent = PPO(predictor, len(codes) + 1, int(window_length), len(features), '-'.join(agent_config), reload_flag,trainable)

    stocktrader=StockTrader()

    if mode=='train':

        print("Training with {:d}".format(epochs))
        for epoch in range(epochs):
            print("Now we are at epoch", epoch)
            traversal(stocktrader,agent,env,epoch,noise_flag,framework,method,trainable)

            if record_flag=='True':
                stocktrader.write(epoch)

            if plot_flag=='True':
                stocktrader.plot_result()

            stocktrader.print_result(epoch,agent)
            stocktrader.reset()

    elif mode=='test':
        traversal(stocktrader, agent, env, 1, noise_flag,framework,method,trainable)
        stocktrader.write(1)
        stocktrader.plot_result()
        stocktrader.print_result(1, agent)
Esempio n. 4
0
def main(env_id, train, logdir, exploration, env_dt, **kwargs):
    sess = tf.InteractiveSession()
    env = gym.make(env_id)

    u_bound = (env.action_space.low, env.action_space.high)
    sig = inspect.signature(DDPG)
    ddpg_kwargs = dict()
    for key in sig.parameters:
        if key in kwargs:
            ddpg_kwargs[key] = kwargs[key]
            kwargs.pop(key)

    action_dim = env.action_space.shape[0]
    state_dim = env.observation_space.shape[0]
    noise = U.OUNoise(action_dim, 0.0, 0.15, 0.2, 0.05, exploration, env_dt)
    agent = DDPG(sess, state_dim, action_dim, u_bound=u_bound, noise=noise, **ddpg_kwargs)

    play = U.Play(sess, env, agent, logdir)

    if train:
        play.train(kwargs['nb_episodes'], kwargs['nb_eval_episodes'])

    play.run_env(train=False)
    env.close()
def session(config, args):
    global PATH_prefix

    codes, start_date, end_date, features, agent_config, \
    market,predictor, framework, window_length,noise_flag, record_flag,\
    plot_flag,reload_flag,trainable,method=parse_config(config,args)
    env = Environment()

    global M
    M = codes + 1

    stocktrader = StockTrader()
    PATH_prefix = "result/DDPG/" + str(args['num']) + '/'

    if args['mode'] == 'train':
        if not os.path.exists(PATH_prefix):
            os.makedirs(PATH_prefix)
            train_start_date, train_end_date, test_start_date, test_end_date, codes = env.get_repo(
                start_date, end_date, codes, market)
            env.get_data(train_start_date, train_end_date, features,
                         window_length, market, codes)
            print("Codes:", codes)
            print('Training Time Period:', train_start_date, '   ',
                  train_end_date)
            print('Testing Time Period:', test_start_date, '   ',
                  test_end_date)
            with open(PATH_prefix + 'config.json', 'w') as f:
                json.dump(
                    {
                        "train_start_date":
                        train_start_date.strftime('%Y-%m-%d'),
                        "train_end_date": train_end_date.strftime('%Y-%m-%d'),
                        "test_start_date":
                        test_start_date.strftime('%Y-%m-%d'),
                        "test_end_date": test_end_date.strftime('%Y-%m-%d'),
                        "codes": codes
                    }, f)
                print("finish writing config")
        else:
            with open("result/DDPG/" + str(args['num']) + '/config.json',
                      'r') as f:
                dict_data = json.load(f)
                print("successfully load config")
            train_start_date, train_end_date, codes = datetime.datetime.strptime(
                dict_data['train_start_date'],
                '%Y-%m-%d'), datetime.datetime.strptime(
                    dict_data['train_end_date'],
                    '%Y-%m-%d'), dict_data['codes']
            env.get_data(train_start_date, train_end_date, features,
                         window_length, market, codes)

        for noise_flag in [
                'True'
        ]:  #['False','True'] to train agents with noise and without noise in assets prices

            print("*-----------------Loading DDPG Agent---------------------*")
            agent = DDPG(predictor,
                         len(codes) + 1, int(window_length), len(features),
                         '-'.join(agent_config), reload_flag, trainable)

            print("Training with {:d}".format(epochs))
            for epoch in range(epochs):
                print("Now we are at epoch", epoch)
                traversal(stocktrader, agent, env, epoch, noise_flag,
                          framework, method, trainable)

                if record_flag == 'True':
                    stocktrader.write(epoch, framework)

                if plot_flag == 'True':
                    stocktrader.plot_result()

                agent.reset_buffer()
                stocktrader.print_result(epoch, agent, noise_flag)
                stocktrader.reset()
            agent.close()
            del agent

    elif args['mode'] == 'test':
        with open(PATH_prefix + 'config.json', 'r') as f:
            dict_data = json.load(f)
        test_start_date, test_end_date, codes = datetime.datetime.strptime(
            dict_data['test_start_date'],
            '%Y-%m-%d'), datetime.datetime.strptime(
                dict_data['test_end_date'], '%Y-%m-%d'), dict_data['codes']
        env.get_data(test_start_date, test_end_date, features, window_length,
                     market, codes)

        backtest([
            DDPG(predictor,
                 len(codes) + 1, int(window_length), len(features),
                 '-'.join(agent_config), "True", "False")
        ], env)
Esempio n. 6
0
        from agents.reinforce import REINFORCE
        agent = REINFORCE(env, model, buffer, logger, args)
    elif args.algo == "vpg":
        from agents.vpg import VPG
        agent = VPG(env, model, buffer, logger, args)
    elif args.algo == "ppo":
        from agents.ppo import PPO
        agent = PPO(env, model, buffer, logger, args)
elif args.algo in args.q_learning:
    if isinstance(env.action_space, Box):
        # Action limit for clamping
        # Critically: assumes all dimensions share the same bound!
        args.act_limit = env.action_space.high[0]
    if args.algo == "ddpg":
        from agents.ddpg import DDPG
        agent = DDPG(env, model, buffer, logger, args)
    elif args.algo == "td3":
        from agents.td3 import TD3
        agent = TD3(env, model, buffer, logger, args)
    elif args.algo == "sac":
        from agents.sac import SAC
        agent = SAC(env, model, buffer, logger, args)
else:
    algos = tuple(args.policy_gradient + args.q_learning)
    raise NotImplementedError(f"Expected `algo` argument to be one of " +
                              f"{algos}, but got '{args.algo}'.")

"""
    Train
"""
# Train the agent!
Esempio n. 7
0
import rospy
import matplotlib.pyplot as plt

save = 0
current_path = os.getcwd()
t_listener = TorqueListener()
env = KomodoEnvironment()
state_shape = env.state_shape
action_shape = env.action_shape

model = 'ddpg'
if model == 'ddpg':
    agent = DDPG(state_shape,
                 action_shape,
                 batch_size=128,
                 gamma=0.995,
                 tau=0.001,
                 actor_lr=0.0005,
                 critic_lr=0.001,
                 use_layer_norm=True)
    print('DDPG agent configured')
    agent.load_model(agent.current_path + '/model/model.ckpt')
elif model == 'a2c':
    agent = A2C(state_shape,
                action_shape,
                gamma=0.995,
                actor_lr=0.0002,
                critic_lr=0.001,
                use_layer_norm=True)
    print('A2C agent configured')
    agent.load_model(agent.current_path + '/model_a2c/model.ckpt')
Esempio n. 8
0
 def _get_action(self, o):
     # Get actions just like DDPG
     return DDPG._get_action(self, o)
Esempio n. 9
0
import matplotlib.pyplot as plt

HALF_KOMODO = 0.53 / 2
np.set_printoptions(precision=1)
current_path = os.getcwd()
env = KomodoEnvironment()
state_shape = env.state_shape
action_shape = env.action_shape

model = 'a2c'

if model == 'ddpg':
    agent = DDPG(state_shape,
                 action_shape,
                 batch_size=128,
                 gamma=0.995,
                 tau=0.001,
                 actor_lr=0.0001,
                 critic_lr=0.001,
                 use_layer_norm=True)
    print('DDPG agent configured')
elif model == 'a2c':
    agent = A2C(state_shape,
                action_shape,
                gamma=0.995,
                actor_lr=0.0001,
                critic_lr=0.001,
                use_layer_norm=True)
    print('A2C agent configured')

max_episode = 1000
tot_rewards = []
Esempio n. 10
0
def train(env,
          nb_epochs,
          nb_epoch_cycles,
          render_eval,
          reward_scale,
          render,
          param_noise,
          actor,
          critic,
          normalize_returns,
          normalize_observations,
          critic_l2_reg,
          actor_lr,
          critic_lr,
          action_noise,
          popart,
          gamma,
          clip_norm,
          nb_train_steps,
          nb_rollout_steps,
          nb_eval_steps,
          batch_size,
          memory,
          tau=0.01,
          eval_env=None,
          param_noise_adaption_interval=50):
    rank = MPI.COMM_WORLD.Get_rank()

    assert (np.abs(env.action_space.low) == env.action_space.high
            ).all()  # we assume symmetric actions.
    max_action = env.action_space.high
    logger.info(
        'scaling actions by {} before executing in env'.format(max_action))
    agent = DDPG(actor,
                 critic,
                 memory,
                 env.observation_space.shape,
                 env.action_space.shape,
                 gamma=gamma,
                 tau=tau,
                 normalize_returns=normalize_returns,
                 normalize_observations=normalize_observations,
                 batch_size=batch_size,
                 action_noise=action_noise,
                 param_noise=param_noise,
                 critic_l2_reg=critic_l2_reg,
                 actor_lr=actor_lr,
                 critic_lr=critic_lr,
                 enable_popart=popart,
                 clip_norm=clip_norm,
                 reward_scale=reward_scale)
    logger.info('Using agent with the following configuration:')
    logger.info(str(agent.__dict__.items()))

    # Set up logging stuff only for a single worker.
    if rank == 0:
        saver = tf.train.Saver()
    else:
        saver = None

    step = 0
    episode = 0
    eval_episode_rewards_history = deque(maxlen=100)
    episode_rewards_history = deque(maxlen=100)
    with U.single_threaded_session() as sess:
        # Prepare everything.
        agent.initialize(sess)
        sess.graph.finalize()

        agent.reset()
        obs = env.reset()
        if eval_env is not None:
            eval_obs = eval_env.reset()
        done = False
        episode_reward = 0.
        episode_step = 0
        episodes = 0
        t = 0

        epoch = 0
        start_time = time.time()

        epoch_episode_rewards = []
        epoch_episode_steps = []
        epoch_episode_eval_rewards = []
        epoch_episode_eval_steps = []
        epoch_start_time = time.time()
        epoch_actions = []
        epoch_qs = []
        epoch_episodes = 0
        for epoch in range(nb_epochs):
            for cycle in range(nb_epoch_cycles):
                # Perform rollouts.
                for t_rollout in range(nb_rollout_steps):
                    # Predict next action.
                    action, q = agent.pi(obs, apply_noise=True, compute_Q=True)
                    # print(action.shape)
                    # print(env.action_space.shape)
                    assert action.shape == env.action_space.shape

                    # Execute next action.
                    if rank == 0 and render:
                        env.render()
                    assert max_action.shape == action.shape
                    new_obs, r, done, info = env.step(
                        max_action * action
                    )  # scale for execution in env (as far as DDPG is concerned, every action is in [-1, 1])
                    t += 1
                    if rank == 0 and render:
                        env.render()
                    episode_reward += r
                    episode_step += 1

                    # Book-keeping.
                    epoch_actions.append(action)
                    epoch_qs.append(q)
                    agent.store_transition(obs, action, r, new_obs, done)
                    obs = new_obs

                    if done:
                        # Episode done.
                        epoch_episode_rewards.append(episode_reward)
                        episode_rewards_history.append(episode_reward)
                        epoch_episode_steps.append(episode_step)
                        episode_reward = 0.
                        episode_step = 0
                        epoch_episodes += 1
                        episodes += 1

                        agent.reset()
                        obs = env.reset()

                # Train.
                epoch_actor_losses = []
                epoch_critic_losses = []
                epoch_adaptive_distances = []
                for t_train in range(nb_train_steps):
                    # Adapt param noise, if necessary.
                    if memory.nb_entries >= batch_size and t_train % param_noise_adaption_interval == 0:
                        distance = agent.adapt_param_noise()
                        epoch_adaptive_distances.append(distance)

                    cl, al = agent.train()
                    epoch_critic_losses.append(cl)
                    epoch_actor_losses.append(al)
                    agent.update_target_net()

                # Evaluate.
                eval_episode_rewards = []
                eval_qs = []
                if eval_env is not None:
                    eval_episode_reward = 0.
                    for t_rollout in range(nb_eval_steps):
                        eval_action, eval_q = agent.pi(eval_obs,
                                                       apply_noise=False,
                                                       compute_Q=True)
                        eval_obs, eval_r, eval_done, eval_info = eval_env.step(
                            max_action * eval_action
                        )  # scale for execution in env (as far as DDPG is concerned, every action is in [-1, 1])
                        if render_eval:
                            eval_env.render()
                        eval_episode_reward += eval_r

                        eval_qs.append(eval_q)
                        if eval_done:
                            eval_obs = eval_env.reset()
                            eval_episode_rewards.append(eval_episode_reward)
                            eval_episode_rewards_history.append(
                                eval_episode_reward)
                            eval_episode_reward = 0.

            mpi_size = MPI.COMM_WORLD.Get_size()
            # Log stats.
            # XXX shouldn't call np.mean on variable length lists
            duration = time.time() - start_time
            stats = agent.get_stats()
            combined_stats = stats.copy()
            combined_stats['rollout/return'] = np.mean(epoch_episode_rewards)
            combined_stats['rollout/return_history'] = np.mean(
                episode_rewards_history)
            combined_stats['rollout/episode_steps'] = np.mean(
                epoch_episode_steps)
            combined_stats['rollout/actions_mean'] = np.mean(epoch_actions)
            combined_stats['rollout/Q_mean'] = np.mean(epoch_qs)
            combined_stats['train/loss_actor'] = np.mean(epoch_actor_losses)
            combined_stats['train/loss_critic'] = np.mean(epoch_critic_losses)
            combined_stats['train/param_noise_distance'] = np.mean(
                epoch_adaptive_distances)
            combined_stats['total/duration'] = duration
            combined_stats['total/steps_per_second'] = float(t) / float(
                duration)
            combined_stats['total/episodes'] = episodes
            combined_stats['rollout/episodes'] = epoch_episodes
            combined_stats['rollout/actions_std'] = np.std(epoch_actions)
            # Evaluation statistics.
            if eval_env is not None:
                combined_stats['eval/return'] = np.mean(eval_episode_rewards)
                combined_stats['eval/return_history'] = np.mean(
                    eval_episode_rewards_history)
                # combined_stats['eval/Q'] = eval_qs
                combined_stats['eval/episodes'] = len(eval_episode_rewards)

            def as_scalar(x):
                if isinstance(x, np.ndarray):
                    assert x.size == 1
                    return x[0]
                elif np.isscalar(x):
                    return x
                else:
                    raise ValueError('expected scalar, got %s' % x)

            combined_stats_sums = MPI.COMM_WORLD.allreduce(
                np.array([as_scalar(x) for x in combined_stats.values()]))
            combined_stats = {
                k: v / mpi_size
                for (k, v) in zip(combined_stats.keys(), combined_stats_sums)
            }

            # Total statistics.
            combined_stats['total/epochs'] = epoch + 1
            combined_stats['total/steps'] = t

            for key in sorted(combined_stats.keys()):
                logger.record_tabular(key, combined_stats[key])
            logger.dump_tabular()
            logger.info('')
            logdir = logger.get_dir()
            if rank == 0 and logdir:
                if hasattr(env, 'get_state'):
                    with open(os.path.join(logdir, 'env_state.pkl'),
                              'wb') as f:
                        pickle.dump(env.get_state(), f)
                if eval_env and hasattr(eval_env, 'get_state'):
                    with open(os.path.join(logdir, 'eval_env_state.pkl'),
                              'wb') as f:
                        pickle.dump(eval_env.get_state(), f)
Esempio n. 11
0
 def _update_networks(self):
     # Update networks just like DDPG (but overloading the functions below)
     return DDPG._update_networks(self)
 def __init__(self,
              state_size,
              action_size,
              seed,
              num_agents,
              memory,
              ActorNetwork,
              CriticNetwork,
              device,
              BOOTSTRAP_SIZE=5,
              GAMMA=0.99,
              TAU=1e-3,
              LR_CRITIC=5e-4,
              LR_ACTOR=5e-4,
              UPDATE_EVERY=1,
              TRANSFER_EVERY=2,
              UPDATE_LOOP=10,
              ADD_NOISE_EVERY=5,
              WEIGHT_DECAY=0,
              FILE_NAME="multi_ddpg"):
     """Initialize an Agent object.
     
     Params
     ======
         state_size (int): dimension of each state
         action_size (int): dimension of each action
         seed (int): random seed
         num_agents: number of running agents
         memory: instance of ReplayBuffer
         ActorNetwork: a class inheriting from torch.nn.Module that define the structure of the actor neural network
         CriticNetwork: a class inheriting from torch.nn.Module that define the structure of the critic neural network
         device: cpu or cuda:0 if available
         BOOTSTRAP_SIZE: length of the bootstrap
         GAMMA: discount factor
         TAU: for soft update of target parameters
         LR_CRITIC: learning rate of the critics
         LR_ACTOR: learning rate of the actors
         UPDATE_EVERY: how often to update the networks
         TRANSFER_EVERY: after how many update do we transfer from the online network to the targeted fixed network
         UPDATE_LOOP: Number of loops of learning whenever the agent is learning
         ADD_NOISE_EVERY: how often to add noise to favor exploration
         WEIGHT_DECAY: Parameter of the Adam Optimizer of the Critic Network
         FILE_NAME: default prefix to the saved model
     """
     # Instantiate n agent with n network
     self.agents = [
         DDPG(state_size,
              action_size,
              seed,
              memory,
              ActorNetwork,
              CriticNetwork,
              device,
              BOOTSTRAP_SIZE,
              GAMMA,
              TAU,
              LR_CRITIC,
              LR_ACTOR,
              UPDATE_EVERY,
              TRANSFER_EVERY,
              UPDATE_LOOP,
              ADD_NOISE_EVERY,
              WEIGHT_DECAY,
              FILE_NAME=FILE_NAME + "_" + str(i)) for i in range(num_agents)
     ]
     self.rewards = [
         deque(maxlen=BOOTSTRAP_SIZE) for i in range(num_agents)
     ]
     self.states = [deque(maxlen=BOOTSTRAP_SIZE) for i in range(num_agents)]
     self.gammas = np.array([GAMMA**i for i in range(BOOTSTRAP_SIZE)])
def session(config, args):
    global PATH_prefix
    from data.environment import Environment
    codes, start_date, end_date, features, agent_config, market, predictor, framework, window_length, noise_flag, record_flag, plot_flag, reload_flag, trainable, method = parse_config(
        config, args)
    env = Environment()

    global M
    if market == 'China':
        M = codes + 1
    else:
        M = len(codes) + 1
#     print("len codes",len(codes))
#     M=codes+1
# M = số lượng stock -> ảnh huong đến noise - chi tiết from agents.ornstein_uhlenbeck import OrnsteinUhlenbeckActionNoise

# if framework == 'DDPG':
#     print("*-----------------Loading DDPG Agent---------------------*")
#     from agents.ddpg import DDPG
#     agent = DDPG(predictor, len(codes) + 1, int(window_length), len(features), '-'.join(agent_config), reload_flag,trainable)
#
# elif framework == 'PPO':
#     print("*-----------------Loading PPO Agent---------------------*")
#     from agents.ppo import PPO
#     agent = PPO(predictor, len(codes) + 1, int(window_length), len(features), '-'.join(agent_config), reload_flag,trainable)

    stocktrader = StockTrader()
    PATH_prefix = "./result_new/PG/" + str(args['num']) + '/'  #<-

    if args['mode'] == 'train':
        if not os.path.exists(PATH_prefix):
            print('Create new path at', PATH_prefix)
            os.makedirs(PATH_prefix)
            if market == "China":
                train_start_date, train_end_date, test_start_date, test_end_date, codes = env.get_repo(
                    start_date, end_date, codes, market)
            else:
                train_start_date, train_end_date, test_start_date, test_end_date, codes = env.get_repo(
                    start_date, end_date, len(codes), market)

            env.get_data(train_start_date, train_end_date, features,
                         window_length, market, codes)

            print("Codes:", codes)
            print('Training Time Period:', train_start_date, '   ',
                  train_end_date)
            print('Testing Time Period:', test_start_date, '   ',
                  test_end_date)
            with open(PATH_prefix + 'config.json', 'w') as f:
                json.dump(
                    {
                        "train_start_date":
                        train_start_date.strftime('%Y-%m-%d'),
                        "train_end_date": train_end_date.strftime('%Y-%m-%d'),
                        "test_start_date":
                        test_start_date.strftime('%Y-%m-%d'),
                        "test_end_date": test_end_date.strftime('%Y-%m-%d'),
                        "codes": codes
                    }, f)
                print("finish writing config")

        else:
            with open("./result_new/PG/" + str(args['num']) + '/config.json',
                      'r') as f:
                dict_data = json.load(f)
                print("successfully load config")

            if market == "China":
                train_start_date, train_end_date, test_start_date, test_end_date, codes = env.get_repo(
                    start_date, end_date, codes, market)
            else:
                train_start_date, train_end_date, test_start_date, test_end_date, codes = env.get_repo(
                    start_date, end_date, len(codes), market)

            env.get_data(train_start_date, train_end_date, features,
                         window_length, market, codes)


#             train_start_date, train_end_date, codes = datetime.datetime.strptime(dict_data['train_start_date'],                                                                               '%Y-%m-%d'), datetime.datetime.strptime(dict_data['train_end_date'], '%Y-%m-%d'), dict_data['codes']

#             env.get_data(train_start_date, train_end_date, features, window_length, market, codes)

        for noise_flag in [
                'True'
        ]:  #['False','True'] to train agents with noise and without noise in assets prices

            if framework == 'PG':
                print(
                    "*-----------------Loading PG Agent---------------------*")
                agent = PG(
                    len(codes) + 1, int(window_length), len(features),
                    '-'.join(agent_config), reload_flag, trainable, noise_flag,
                    args['num'])
                print("Finish import {}".format(agent.name))

            elif framework == 'DDPG':
                print(
                    "*-----------------Loading DDPG Agent---------------------*"
                )
                from agents.ddpg import DDPG
                agent = DDPG(predictor,
                             len(codes) + 1, int(window_length), len(features),
                             '-'.join(agent_config), reload_flag, trainable)
                print("Finish import {}".format(agent.name))

            print("Training with {:d}".format(epochs))
            for epoch in range(epochs):
                print("Now we are at epoch", epoch)
                traversal(stocktrader, agent, env, epoch, noise_flag,
                          framework, method, trainable)
                if record_flag == 'True':
                    stocktrader.write(epoch, framework)
                if plot_flag == 'True':
                    stocktrader.plot_result()
                #print(agent)
                agent.reset_buffer()
                stocktrader.print_result(epoch, agent, noise_flag)
                stocktrader.reset()
            agent.close()
            del agent

    #######
    # TESTING

    elif args['mode'] == 'test':
        with open("./result_new/PG/" + str(args['num']) + '/config.json',
                  'r') as f:
            dict_data = json.load(f)
        test_start_date, test_end_date, codes = datetime.datetime.strptime(
            dict_data['test_start_date'],
            '%Y-%m-%d'), datetime.datetime.strptime(
                dict_data['test_end_date'], '%Y-%m-%d'), dict_data['codes']
        env.get_data(test_start_date, test_end_date, features, window_length,
                     market, codes)
        backtest([
            PG(
                len(codes) + 1, int(window_length), len(features),
                '-'.join(agent_config), 'True', 'False', 'True', args['num'])
        ], env, market)
Esempio n. 14
0
def session(config, args):
    codes, start_date, end_date, features, agent_config, \
        market, predictor, framework, window_length, noise_flag, record_flag, \
        plot_flag, reload_flag, trainable, method, epochs = parse_config(config, args)
    env = Environment(args.seed)

    stocktrader = StockTrader()
    path = "result/{}/{}/".format(framework, args.num)
    logger.info('Mode: {}'.format(args.mode))

    if args.mode == 'train':
        if not os.path.exists(path):
            os.makedirs(path)
            train_start_date, train_end_date, test_start_date, test_end_date, codes = env.get_repo(
                start_date, end_date, codes, market)
            logger.debug("Training with codes: {}".format(codes))
            env.get_data(train_start_date, train_end_date, features,
                         window_length, market, codes)
            with open(path + 'config.json', 'w') as f:
                print(train_start_date)
                print(train_end_date)
                print(test_start_date)
                print(test_end_date)
                json.dump(
                    {
                        "train_start_date":
                        train_start_date.strftime('%Y-%m-%d'),
                        "train_end_date": train_end_date.strftime('%Y-%m-%d'),
                        "test_start_date":
                        test_start_date.strftime('%Y-%m-%d'),
                        "test_end_date": test_end_date.strftime('%Y-%m-%d'),
                        "codes": codes
                    }, f)
        else:
            with open('result/{}/{}/config.json'.format(framework, args.num),
                      'r') as f:
                dict_data = json.load(f)
            train_start_date, train_end_date, codes = datetime.strptime(
                dict_data['train_start_date'],
                '%Y-%m-%d'), datetime.strptime(dict_data['train_end_date'],
                                               '%Y-%m-%d'), dict_data['codes']
            env.get_data(train_start_date, train_end_date, features,
                         window_length, market, codes)

        if framework == 'PG':
            logger.debug("Loading PG Agent")
            agent = PG(
                len(codes) + 1, int(window_length), len(features),
                '-'.join(agent_config), reload_flag, trainable, args.num)
        elif framework == 'DDPG':
            logger.debug("Loading DDPG Agent")
            agent = DDPG(
                len(codes) + 1, int(window_length), len(features),
                '-'.join(agent_config), reload_flag, trainable, args.num)

        logger.info("Training: %d epochs", epochs)
        for epoch in range(epochs):
            traversal(stocktrader, agent, env, epoch, True, framework, method,
                      trainable)

            if record_flag:
                stocktrader.write(epoch, framework)

            if plot_flag:
                stocktrader.plot_result()

            agent.reset_buffer()
            stocktrader.print_result(epoch, agent, True)
            stocktrader.reset()
        agent.close()

    elif args.mode == 'test':

        with open("result/{}/{}/config.json".format(framework, args.num),
                  'r') as f:
            dict_data = json.load(f)
        test_start_date, test_end_date, codes = datetime.strptime(
            dict_data['test_start_date'],
            '%Y-%m-%d'), datetime.strptime(dict_data['test_end_date'],
                                           '%Y-%m-%d'), dict_data['codes']
        env.get_data(test_start_date, test_end_date, features, window_length,
                     market, codes)
        if framework == 'PG':
            logger.info("Loading PG Agent")
            agent = PG(
                len(codes) + 1, int(window_length), len(features),
                '-'.join(agent_config), True, False, args.num)
        elif framework == 'DDPG':
            logger.info("Loading DDPG Agent")
            agent = DDPG(
                len(codes) + 1, int(window_length), len(features),
                '-'.join(agent_config), True, False, args.num)
        backtest([agent], env, "result/{}/{}/".format(framework, args.num),
                 framework)
Esempio n. 15
0
                        processor=processor)
    else:
        assert not opt.recurrent
        # Setup random process for exploration
        random_process = [
            GaussianWhiteNoiseProcess(sigma=0.0, mu=1.0),
            GaussianWhiteNoiseProcess(sigma=1.0, mu=0.0)
        ]
        # Setup DDPG agent model
        actor, critic, action_input = DDPG_Model(
            window_length=opt.ddpg_window_length,
            num_actions=env.available_actions)
        # Setup DDPG agent
        agent = DDPG(actor=actor,
                     critic=critic,
                     critic_action_input=action_input,
                     num_actions=env.available_actions,
                     processor=processor,
                     random_process=random_process)

    print(mission_name + ' initialized.')

    # Setup weights path
    path = os.path.join('weights', 'Malmo', '{}'.format(mission_name))
    if not os.path.exists(path):
        os.makedirs(path)
    weights_path = os.path.join(path, '{}.hdf5'.format(name))

    # Run the agent
    agent.fit(env=env,
              num_steps=args.steps,
              weights_path=weights_path,
Esempio n. 16
0
else:
    writer = None

env = gym.make(args.env_name)
action_dim = env.action_space.shape[0]
state_dim = env.observation_space.shape[0]
state_rms = RunningMeanStd(state_dim)

if args.algo == 'ppo':
    agent = PPO(writer, device, state_dim, action_dim, agent_args)
elif args.algo == 'sac':
    agent = SAC(writer, device, state_dim, action_dim, agent_args)
elif args.algo == 'ddpg':
    from utils.noise import OUNoise
    noise = OUNoise(action_dim, 0)
    agent = DDPG(writer, device, state_dim, action_dim, agent_args, noise)

if (torch.cuda.is_available()) and (args.use_cuda):
    agent = agent.cuda()

if args.load != 'no':
    agent.load_state_dict(torch.load("./model_weights/" + args.load))

score_lst = []
state_lst = []

if agent_args.on_policy == True:
    score = 0.0
    state_ = (env.reset())
    state = np.clip((state_ - state_rms.mean) / (state_rms.var**0.5 + 1e-8),
                    -5, 5)