Beispiel #1
0
 def __init__(self, env_name='Hopper-v2', total_episodes=1000, learning_steps=1000, update_time=1,
              episode_length=1000, total_steps=int(1e6), lr=1e-3, action_bound=1, num_samples=10, noise=0.02, best=2,
              std_dev=0.03, batch_size=100, elite_percentage=0.2, mutate=0.9, crossover=0.2, hidden_size=64, seed=1,
              namescope='default'):
     self.env = gym.make(env_name)
     np.random.seed(seed)
     self.env.seed(seed)
     tf.set_random_seed(seed)
     self.input_size = self.env.observation_space.shape[0]
     self.output_size = self.env.action_space.shape[0]
     self.total_episodes = total_episodes
     self.episode_length = episode_length
     self.total_steps = total_steps
     self.update_time = update_time
     self.lr = lr
     self.action_bound = action_bound
     self.num_samples = num_samples
     self.noise = noise
     self.stddev = std_dev
     self.batch_size = batch_size
     self.elite_percentage = elite_percentage
     self.mutate = mutate
     self.crossover = crossover
     self.hidden_size = hidden_size
     self.normalizer = utils.Normalizer(self.input_size)
     self.batch_size = batch_size
     self.namescope = namescope
     self.gamma = 1
     self.best = best
     # config = tf.ConfigProto(device_count={'GPU': gpu})
     self.learning_steps = learning_steps
     self.td3_agent = td3_network.TD3(self.input_size, self.output_size, 1, namescope=self.namescope,
                                      hidden_size=hidden_size)
Beispiel #2
0
 def __init__(self, env_name='Hopper-v2', total_episodes=1000, action_bound=1,
              episode_length=1000, learning_rate=0.02, weight=0.01, learning_steps=100,
              num_samples=8, noise=0.02, bc_index=[], std_dev=0.03, syn_step=10,
              meta_population_size=5, seed=1, hidden_size=300):
     self.env = gym.make(env_name)
     np.random.seed(seed)
     self.env.seed(seed)
     self.action_bound = action_bound
     self.input_size = self.env.observation_space.shape[0]
     self.output_size = self.env.action_space.shape[0]
     self.total_episodes = total_episodes
     self.episode_length = episode_length
     self.lr = learning_rate
     self.num_samples = num_samples
     self.noise = noise
     self.meta_population_size = meta_population_size
     self.seed = seed
     self.syn_step = syn_step
     self.learning_steps = learning_steps
     self.bc_index = bc_index
     self.weight = weight
     self.normalizer = utils.Normalizer(self.env.observation_space.shape[0])
     self.hidden_size = hidden_size
     self.stddev = std_dev
     self.td3_agent = td3_network.TD3(self.input_size, self.output_size, 1, hidden_size=self.hidden_size)
     self.num_best_deltas = 4
Beispiel #3
0
 def __init__(self,
              env_name='Hopper-v2',
              total_episodes=1000,
              action_bound=1,
              episode_length=1000,
              learning_rate=0.02,
              weight=0.01,
              learning_steps=100,
              num_samples=8,
              noise=0.02,
              bc_index=[],
              std_dev=0.03,
              syn_step=10,
              num_best=4,
              meta_population_size=5,
              seed=1,
              hidden_size=300,
              coefficient=1):
     self.env = gym.make(env_name)
     np.random.seed(seed)
     self.env.seed(seed)
     self.action_bound = action_bound
     self.input_size = self.env.observation_space.shape[0]
     self.output_size = self.env.action_space.shape[0]
     self.total_episodes = total_episodes
     self.episode_length = episode_length
     self.lr = learning_rate
     self.num_best = num_best
     self.num_samples = num_samples
     self.noise = noise
     self.meta_population_size = meta_population_size
     self.seed = seed
     self.syn_step = syn_step
     self.coefficient = coefficient
     self.learning_steps = learning_steps
     self.bc_index = bc_index
     self.weight = weight
     self.normalizer = utils.Normalizer(self.env.observation_space.shape[0])
     self.hidden_size = hidden_size
     self.stddev = std_dev
     self.intrinsic_network = IntrinsicNetwork(state_dim=self.input_size,
                                               action_dim=self.output_size,
                                               seed=self.seed,
                                               namescope=str(seed),
                                               weight=self.weight)
     self.replay = utils.ReplayBuffer()
Beispiel #4
0
 def __init__(self,
              env_name='Hopper-v2',
              total_episodes=1000,
              namescope='default',
              episode_length=1000,
              total_steps=int(1e6),
              lr=0.01,
              action_bound=1,
              num_samples=10,
              noise=0.02,
              std_dev=0.03,
              batch_size=64,
              elite_percentage=0.2,
              mutate=0.9,
              crossover=0.2,
              hidden_size=64,
              seed=1):
     self.env = gym.make(env_name)
     np.random.seed(seed)
     self.env.seed(seed)
     tf.set_random_seed(seed)
     self.input_size = self.env.observation_space.shape[0]
     self.output_size = self.env.action_space.shape[0]
     self.total_episodes = total_episodes
     self.episode_length = episode_length
     self.total_steps = total_steps
     self.lr = lr
     self.action_bound = action_bound
     self.num_samples = num_samples
     self.noise = noise
     self.stddev = std_dev
     self.batch_size = batch_size
     self.elite_percentage = elite_percentage
     self.mutate = mutate
     self.gamma = 1
     self.crossover = crossover
     self.hidden_size = hidden_size
     self.normalizer = utils.Normalizer(self.input_size)
     self.namescope = namescope
Beispiel #5
0
import trading_vix_env
import numpy as np
import config as C
import utils
import train
import gym
import policy

if __name__ == '__main__':

    np.random.seed(0)
    env = trading_vix_env.trading_vix_env()
    #env = gym.make('Pendulum-v0')
    env.seed(0)
    #env = wrappers.Monitor(env, monitor_dir, force=True)
    #policy_inputs_size = env.observation_space.shape[0]
    policy_outputs_size = env.action_space.shape[0]
    policy = policy.Policy(C.extracted_feature_size, policy_outputs_size)
    normalizer = utils.Normalizer(C.extracted_feature_size)
    train.train(env, policy, normalizer)
Beispiel #6
0
    target_orig_heights = torch.stack(target_orig_heights)
    target_orig_widths = torch.stack(target_orig_widths)
    target_orig_sizes = torch.stack((target_orig_heights,
                                     target_orig_widths)).transpose(0, 1)
    origsize = (dictionaries[0]['orig_height'].item(),
                dictionaries[0]['orig_width'].item())

    # Tensor -> float & numpy
    if testset.there_is_gt:
        target_count = target_count.item()
        target_locations = \
            target_locations[0].to(device_cpu).numpy().reshape(-1, 2)
    target_orig_size = \
        target_orig_sizes[0].to(device_cpu).numpy().reshape(2)

    normalzr = utils.Normalizer(args.height, args.width)

    # Feed forward
    with torch.no_grad():
        est_maps, est_count = model.forward(imgs)

    # Convert to original size
    est_map_np = est_maps[0, :, :].to(device_cpu).numpy()
    est_map_np_origsize = \
        skimage.transform.resize(est_map_np,
                                 output_shape=origsize,
                                 mode='constant')
    orig_img_np = imgs[0].to(device_cpu).squeeze().numpy()
    orig_img_np_origsize = ((skimage.transform.resize(orig_img_np.transpose((1, 2, 0)),
                                                   output_shape=origsize,
                                                   mode='constant') + 1) / 2.0 * 255.0).\