def setup_experiment(conf,
                     name,
                     extype='dqn',
                     use_portfolio=False,
                     experiment_name=None,
                     seed=1):
    if use_portfolio:
        n_acc = 50
        portfolio = generate_portfolio(n_acc, seed=seed)
    else:
        portfolio = None

    params = Parameters()
    params.rho = 0.15

    actions_bins = np.array([0., 0.2, 0.5, 0.7, 1.0, 1.5])
    n_actions = len(actions_bins)

    # rep_dist = BetaRepayment(params, 0.9, 0.5, 10, MAX_ACCOUNT_BALANCE)
    rep_dist = UniformRepayment(params)

    c_env = CollectionsEnv(params=params,
                           repayment_dist=rep_dist,
                           reward_shaping='continuous',
                           randomize_start=True,
                           max_lambda=None,
                           starting_state=np.array([3, 200], dtype=np.float32))
    environment = DiscretizedActionWrapper(c_env, actions_bins)

    if extype == 'dqn':
        dqn = DQNAgent(environment,
                       name,
                       training=True,
                       config=conf,
                       initialize=False,
                       portfolio=portfolio,
                       experiment_name=experiment_name)
    elif extype == 'bspline':
        dqn = DQNAgentPoly(environment,
                           name,
                           training=True,
                           config=conf,
                           portfolio=portfolio,
                           experiment_name=experiment_name)
    elif extype == 'dqnpenal':
        dqn = DQNAgentPenalized(environment,
                                name,
                                config=conf,
                                training=True,
                                portfolio=portfolio,
                                experiment_name=experiment_name)
    else:
        raise ValueError('Unsupported experiment type.')

    return dqn
예제 #2
0
 def load(cls, model_path, load_buffer=False):
     # loads trained model
     loaded_config = TrainConfigBase.load(
         os.path.join(model_path, 'train_config.pkl'))
     loaded_env = CollectionsEnv.load(os.path.join(model_path, 'env.pkl'))
     loaded_instance = DQNAgent(loaded_env,
                                model_path,
                                loaded_config,
                                initialize=False,
                                training=False)
     loaded_instance.main_net = tf.keras.models.load_model(
         os.path.join(model_path, 'main_net.h5'))
     loaded_instance.main_net.compile()
     loaded_instance.target_net = loaded_instance.main_net
     if load_buffer:
         try:
             buffer_path = os.path.join(model_path, 'buffer.pkl')
             with open(buffer_path, 'rb') as f:
                 buffer = pickle.load(f)
                 loaded_instance.memory.buffer = buffer
         except (FileNotFoundError, IOError):
             print('No buffer found.')
     return loaded_instance
예제 #3
0
            tf.summary.scalar('stddev', stddev)
            tf.summary.histogram('histogram', var)

    def save(self):
       #current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        path_model = os.path.join(self.model_dir, 'main_net.h5')
        path_actions = os.path.join(self.model_dir, 'action_bins.npy')
        path_memory_buffer = os.path.join(self.model_dir, 'buffer.pkl')

        self.main_net.save(path_model)
        self.memory.save(path_memory_buffer)
        np.save(path_actions, self.env.action_bins)

    def load(self, model_path):

        # self.main_net = tf.keras.models.load_model(os.path.join(model_path, 'main_net.h5'))
        # self.target_net = tf.keras.models.load_model(os.path.join(model_path, 'main_net.h5'))
        self.main_net.load_weights(os.path.join(model_path, 'main_net.h5'))
        self.target_net.load_weights(os.path.join(model_path, 'main_net.h5'))
        self.action_bins = np.load(os.path.join(model_path, 'action_bins.npy'))

if __name__ == '__main__':
    actions_bins = np.array([0, 1.0])
    n_actions = len(actions_bins)
    c_env = CollectionsEnv(reward_shaping='continuous', randomize_start=True, max_lambda=None)
    environment = DiscretizedActionWrapper(c_env, actions_bins)
   #  environment = StateNormalization(environment)

    dqn = DQNAgentLattice(environment, 'DDQNLattice', training=True, config=DefaultConfig(), initialize=False)
    dqn.run()
예제 #4
0
from learning.collections_env import CollectionsEnv
from learning.utils.wrappers import StateNormalization, DiscretizedActionWrapper

import numpy as np
from copy import deepcopy

action_bins = np.array([0, 1.0])

environment = CollectionsEnv(reward_shaping=False)
environment = DiscretizedActionWrapper(environment, action_bins)
# environment = StateNormalization(environment)

print(f"Reseting: {environment.reset()}")
print(f"Step: {environment.step(0)}")
print(f"Reseting: {environment.reset()}")

environment.env.starting_state = 2  #np.array([10, 100])
print(f"Setting start: {environment.env.starting_state}")
print(f"Reseting: {environment.reset()}")

#
# class ImuT:
#     def __init__(self, a):
#         self.a = np.array([0])
#         self.starting_a = self.a.copy()
#
#     def reset(self):
#         self.a = self.starting_a.copy()
#         return self.a
#
#     def step(self):
        w = np.linspace(0, 200, w_points)
        ww, ll = np.meshgrid(w, l)
        z = np.zeros_like(ww)
        p = np.zeros_like(ww)
        for i, xp in enumerate(w):
            for j, yp in enumerate(l):
                fixed_obs = np.array([ls[j], ws[i]])
                z[j, i] = np.argmax(model.predict_on_batch(fixed_obs[None, :]).numpy().flatten())

        fig, ax = plt.subplots(nrows=1, ncols=2)
        im = ax[0].pcolor(ww, ll, p)
        cdict = {
            'red': ((0.0, 0.25, .25), (0.02, .59, .59), (1., 1., 1.)),
            'green': ((0.0, 0.0, 0.0), (0.02, .45, .45), (1., .97, .97)),
            'blue': ((0.0, 1.0, 1.0), (0.02, .75, .75), (1., 0.45, 0.45))
        }

        cm = m.colors.LinearSegmentedColormap('my_colormap', cdict, 1024)
        im = ax[0].pcolor(ww, ll, z, cmap=cm)
        fig.colorbar(im)
        fig.show()

    return model


if __name__ == '__main__':
    from learning.collections_env import CollectionsEnv

    environ = CollectionsEnv()
    model = construct_lattice(environ)
예제 #6
0
if __name__ == '__main__':
    from dcc import Parameters

    params = Parameters()
    params.rho = 0.15

    actions_bins = np.array([0, 0.2, 0.5, 1.0])
    n_actions = len(actions_bins)

    # rep_dist = BetaRepayment(params, 0.9, 0.5, 10, MAX_ACCOUNT_BALANCE)
    rep_dist = UniformRepayment(params)

    c_env = CollectionsEnv(params=params,
                           repayment_dist=rep_dist,
                           reward_shaping='continuous',
                           randomize_start=True,
                           max_lambda=None,
                           starting_state=np.array([3, 200], dtype=np.float32))
    environment = DiscretizedActionWrapper(c_env, actions_bins)

    portfolio_acc = generate_portfolio(50)

    experiment_name = None

    dqn = DQNAgent(environment,
                   'test_constr_log_every_pricer',
                   training=True,
                   config=DefaultConfig(),
                   initialize=False,
                   portfolio=portfolio_acc,
                   experiment_name=experiment_name)
예제 #7
0
from collections import defaultdict
from learning.collections_env import DiscretizedObservationWrapper, DiscretizedActionWrapper
from learning.collections_env import CollectionsEnv
import numpy as np

Q = defaultdict(float)
gamma = 0.99  # Discounting factor
alpha = 0.5  # soft update param

env = CollectionsEnv()

print(f'Action space: {env.action_space}')
print(f'State space: {env.observation_space}')

env = DiscretizedActionWrapper(env)
env = DiscretizedObservationWrapper(env)
actions = range(env.action_space.n)

print(f'Discretized action space: {env.action_space}')
print(f'Discretized state space: {env.observation_space}')


def update_Q(s, r, a, s_next, done):
    max_q_next = max([Q[s_next, a] for a in actions])
    # Do not include the next state's value if currently at the terminal state.
    Q[s, a] += alpha * (r + gamma * max_q_next * (1.0 - done) - Q[s, a])


def act(ob):
    if np.random.random() < epsilon:
        # action_space.sample() is a convenient function to get a random action