def __init__(self, env_name='Hopper-v2', total_episodes=1000, learning_steps=1000, update_time=1, episode_length=1000, total_steps=int(1e6), lr=1e-3, action_bound=1, num_samples=10, noise=0.02, best=2, std_dev=0.03, batch_size=100, elite_percentage=0.2, mutate=0.9, crossover=0.2, hidden_size=64, seed=1, namescope='default'): self.env = gym.make(env_name) np.random.seed(seed) self.env.seed(seed) tf.set_random_seed(seed) self.input_size = self.env.observation_space.shape[0] self.output_size = self.env.action_space.shape[0] self.total_episodes = total_episodes self.episode_length = episode_length self.total_steps = total_steps self.update_time = update_time self.lr = lr self.action_bound = action_bound self.num_samples = num_samples self.noise = noise self.stddev = std_dev self.batch_size = batch_size self.elite_percentage = elite_percentage self.mutate = mutate self.crossover = crossover self.hidden_size = hidden_size self.normalizer = utils.Normalizer(self.input_size) self.batch_size = batch_size self.namescope = namescope self.gamma = 1 self.best = best # config = tf.ConfigProto(device_count={'GPU': gpu}) self.learning_steps = learning_steps self.td3_agent = td3_network.TD3(self.input_size, self.output_size, 1, namescope=self.namescope, hidden_size=hidden_size)
def __init__(self, env_name='Hopper-v2', total_episodes=1000, action_bound=1, episode_length=1000, learning_rate=0.02, weight=0.01, learning_steps=100, num_samples=8, noise=0.02, bc_index=[], std_dev=0.03, syn_step=10, meta_population_size=5, seed=1, hidden_size=300): self.env = gym.make(env_name) np.random.seed(seed) self.env.seed(seed) self.action_bound = action_bound self.input_size = self.env.observation_space.shape[0] self.output_size = self.env.action_space.shape[0] self.total_episodes = total_episodes self.episode_length = episode_length self.lr = learning_rate self.num_samples = num_samples self.noise = noise self.meta_population_size = meta_population_size self.seed = seed self.syn_step = syn_step self.learning_steps = learning_steps self.bc_index = bc_index self.weight = weight self.normalizer = utils.Normalizer(self.env.observation_space.shape[0]) self.hidden_size = hidden_size self.stddev = std_dev self.td3_agent = td3_network.TD3(self.input_size, self.output_size, 1, hidden_size=self.hidden_size) self.num_best_deltas = 4
def __init__(self, env_name='Hopper-v2', total_episodes=1000, action_bound=1, episode_length=1000, learning_rate=0.02, weight=0.01, learning_steps=100, num_samples=8, noise=0.02, bc_index=[], std_dev=0.03, syn_step=10, num_best=4, meta_population_size=5, seed=1, hidden_size=300, coefficient=1): self.env = gym.make(env_name) np.random.seed(seed) self.env.seed(seed) self.action_bound = action_bound self.input_size = self.env.observation_space.shape[0] self.output_size = self.env.action_space.shape[0] self.total_episodes = total_episodes self.episode_length = episode_length self.lr = learning_rate self.num_best = num_best self.num_samples = num_samples self.noise = noise self.meta_population_size = meta_population_size self.seed = seed self.syn_step = syn_step self.coefficient = coefficient self.learning_steps = learning_steps self.bc_index = bc_index self.weight = weight self.normalizer = utils.Normalizer(self.env.observation_space.shape[0]) self.hidden_size = hidden_size self.stddev = std_dev self.intrinsic_network = IntrinsicNetwork(state_dim=self.input_size, action_dim=self.output_size, seed=self.seed, namescope=str(seed), weight=self.weight) self.replay = utils.ReplayBuffer()
def __init__(self, env_name='Hopper-v2', total_episodes=1000, namescope='default', episode_length=1000, total_steps=int(1e6), lr=0.01, action_bound=1, num_samples=10, noise=0.02, std_dev=0.03, batch_size=64, elite_percentage=0.2, mutate=0.9, crossover=0.2, hidden_size=64, seed=1): self.env = gym.make(env_name) np.random.seed(seed) self.env.seed(seed) tf.set_random_seed(seed) self.input_size = self.env.observation_space.shape[0] self.output_size = self.env.action_space.shape[0] self.total_episodes = total_episodes self.episode_length = episode_length self.total_steps = total_steps self.lr = lr self.action_bound = action_bound self.num_samples = num_samples self.noise = noise self.stddev = std_dev self.batch_size = batch_size self.elite_percentage = elite_percentage self.mutate = mutate self.gamma = 1 self.crossover = crossover self.hidden_size = hidden_size self.normalizer = utils.Normalizer(self.input_size) self.namescope = namescope
import trading_vix_env import numpy as np import config as C import utils import train import gym import policy if __name__ == '__main__': np.random.seed(0) env = trading_vix_env.trading_vix_env() #env = gym.make('Pendulum-v0') env.seed(0) #env = wrappers.Monitor(env, monitor_dir, force=True) #policy_inputs_size = env.observation_space.shape[0] policy_outputs_size = env.action_space.shape[0] policy = policy.Policy(C.extracted_feature_size, policy_outputs_size) normalizer = utils.Normalizer(C.extracted_feature_size) train.train(env, policy, normalizer)
target_orig_heights = torch.stack(target_orig_heights) target_orig_widths = torch.stack(target_orig_widths) target_orig_sizes = torch.stack((target_orig_heights, target_orig_widths)).transpose(0, 1) origsize = (dictionaries[0]['orig_height'].item(), dictionaries[0]['orig_width'].item()) # Tensor -> float & numpy if testset.there_is_gt: target_count = target_count.item() target_locations = \ target_locations[0].to(device_cpu).numpy().reshape(-1, 2) target_orig_size = \ target_orig_sizes[0].to(device_cpu).numpy().reshape(2) normalzr = utils.Normalizer(args.height, args.width) # Feed forward with torch.no_grad(): est_maps, est_count = model.forward(imgs) # Convert to original size est_map_np = est_maps[0, :, :].to(device_cpu).numpy() est_map_np_origsize = \ skimage.transform.resize(est_map_np, output_shape=origsize, mode='constant') orig_img_np = imgs[0].to(device_cpu).squeeze().numpy() orig_img_np_origsize = ((skimage.transform.resize(orig_img_np.transpose((1, 2, 0)), output_shape=origsize, mode='constant') + 1) / 2.0 * 255.0).\