def __init__(self, bot_name='LSTMNet', input_dim=4, output_dim=2): self.bot_name = bot_name self.input_dim = input_dim self.replay_buffer = replay_buffer.ReplayBufferSingleAgent() self.action_network = BaselineLSTMNet(input_dim=input_dim, output_dim=output_dim, is_value=False) self.value_network = BaselineLSTMNet(input_dim=input_dim, output_dim=output_dim, is_value=True) self.ppo = ppo_update.PPO([self.action_network, self.value_network], two_nets=True) self.actor_opt = torch.optim.RMSprop(self.action_network.parameters()) self.value_opt = torch.optim.RMSprop(self.value_network.parameters()) self.last_state = [0, 0, 0, 0] self.last_action = 0 self.last_action_probs = torch.Tensor([0]) self.last_value_pred = torch.Tensor([[0, 0]]) self.action_hidden_state = None self.full_probs = None self.value_hidden_state = None self.new_action_hidden = None self.new_value_hidden = None self.reward_history = [] self.num_steps = 0
def __init__(self, bot_name='RandomProLoNet', input_dim=4, output_dim=2): self.replay_buffer = replay_buffer.ReplayBufferSingleAgent() # self.replay_buffer = ReplayMemory(1000) self.bot_name = bot_name if input_dim == 4 and output_dim == 2: self.action_network, self.value_network = init_random_cart_net() elif input_dim == 8 and output_dim == 4: self.action_network, self.value_network = init_random_lander_net() elif input_dim == 194 and output_dim == 44: self.action_network, self.value_network = init_random_sc_net() elif input_dim == 32 and output_dim == 10: self.action_network, self.value_network = init_random_micro_net() self.ppo = ppo_update.PPO([self.action_network, self.value_network], two_nets=True) self.actor_opt = torch.optim.RMSprop(self.action_network.parameters()) self.value_opt = torch.optim.RMSprop(self.value_network.parameters()) self.last_state = [0, 0, 0, 0] self.last_action = 0 self.last_action_probs = torch.Tensor([0]) self.last_value_pred = torch.Tensor([[0, 0]]) self.full_probs = None self.reward_history = [] self.num_steps = 0
def __init__(self, distribution='one_hot', bot_name='ProLoNet', input_dim=4, output_dim=2, deepen_method='random', deepen_criteria='entropy'): self.replay_buffer = replay_buffer.ReplayBufferSingleAgent() self.bot_name = bot_name if input_dim == 4 and output_dim == 2: self.action_network, self.value_network = init_cart_nets( distribution) elif input_dim == 8 and output_dim == 4: self.action_network, self.value_network = init_lander_nets( distribution) elif input_dim == 194 and output_dim == 44: self.action_network, self.value_network = init_sc_nets( distribution) elif input_dim == 32 and output_dim == 10: self.action_network, self.value_network = init_micro_net( distribution) self.deepen_method = deepen_method self.deeper_action_network = add_level(self.action_network, method=deepen_method) self.deeper_value_network = add_level(self.value_network, method=deepen_method) self.ppo = ppo_update.PPO([self.action_network, self.value_network], two_nets=True) self.actor_opt = torch.optim.RMSprop(self.action_network.parameters()) self.value_opt = torch.optim.RMSprop(self.value_network.parameters()) self.deeper_actor_opt = torch.optim.RMSprop( self.deeper_action_network.parameters()) self.deeper_value_opt = torch.optim.RMSprop( self.deeper_value_network.parameters()) self.last_state = [0, 0, 0, 0] self.last_action = 0 self.last_action_probs = torch.Tensor([0]) self.last_value_pred = torch.Tensor([[0, 0]]) self.last_deep_action_probs = torch.Tensor([0]) self.last_deep_value_pred = torch.Tensor([[0, 0]]) self.full_probs = None self.deeper_full_probs = None self.reward_history = [] self.num_steps = 0 self.deepen_criteria = deepen_criteria self.deepen_threshold = 350 self.times_deepened = 0
def __init__(self, bot_name='DDT', input_dim=4, output_dim=2, rule_list=False, num_rules=4): self.replay_buffer = replay_buffer.ReplayBufferSingleAgent() self.bot_name = bot_name self.rule_list = rule_list self.output_dim = output_dim self.input_dim = input_dim self.num_rules = num_rules if rule_list: self.bot_name += str(num_rules) + '_rules' init_weights, init_comparators, init_leaves = init_rule_list( num_rules, input_dim, output_dim) else: init_weights = None init_comparators = None init_leaves = num_rules self.bot_name += str(num_rules) + '_leaves' self.action_network = ProLoNet(input_dim=input_dim, output_dim=output_dim, weights=init_weights, comparators=init_comparators, leaves=init_leaves, alpha=1, is_value=False, use_gpu=False) self.value_network = ProLoNet(input_dim=input_dim, output_dim=output_dim, weights=init_weights, comparators=init_comparators, leaves=init_leaves, alpha=1, is_value=True, use_gpu=False) self.ppo = ppo_update.PPO([self.action_network, self.value_network], two_nets=True, use_gpu=False) self.last_state = [0, 0, 0, 0] self.last_action = 0 self.last_action_probs = torch.Tensor([0]) self.last_value_pred = torch.Tensor([[0, 0]]) self.last_deep_action_probs = None self.last_deep_value_pred = [None] * output_dim self.full_probs = None self.deeper_full_probs = None self.reward_history = [] self.num_steps = 0
def __init__(self, bot_name='DJINNAgent', input_dim=4, output_dim=2, drop_prob=0.0): self.bot_name = bot_name self.input_dim = input_dim self.output_dim = output_dim self.replay_buffer = replay_buffer.ReplayBufferSingleAgent() if input_dim == 4: self.env_name = 'cart' elif input_dim == 6: # Fire Sim self.env_name = 'fire' elif input_dim == 8: self.env_name = 'lunar' elif input_dim == 37: self.env_name = 'sc_micro' elif input_dim > 100: self.env_name = 'sc_macro' tree_dict = DJINN_TREE_DATA[self.env_name] action_param_dict = tree_to_nn_weights(input_dim, output_dim, tree_dict) # value_param_dict = tree_to_nn_weights(input_dim, 1, tree_dict) self.action_network = PyDJINN(input_dim, weights=action_param_dict['weights'], biases=[], drop_prob=drop_prob, is_value=False) self.value_network = PyDJINN(input_dim, weights=action_param_dict['weights'], biases=[], drop_prob=drop_prob, is_value=True) self.ppo = ppo_update.PPO([self.action_network, self.value_network], two_nets=True) self.actor_opt = torch.optim.RMSprop(self.action_network.parameters(), lr=5e-3) self.value_opt = torch.optim.RMSprop(self.value_network.parameters(), lr=5e-3) self.last_state = [0, 0, 0, 0] self.last_action = 0 self.last_action_probs = torch.Tensor([0]) self.last_value_pred = torch.Tensor([[0, 0]]) self.last_deep_action_probs = torch.Tensor([0]) self.last_deep_value_pred = torch.Tensor([[0, 0]]) self.full_probs = None self.reward_history = [] self.num_steps = 0
def __init__(self, distribution='one_hot', bot_name='ShallowProLoNet', input_dim=4, output_dim=2, adversarial=False): self.replay_buffer = replay_buffer.ReplayBufferSingleAgent() self.bot_name = bot_name self.adv_prob = 0.1 if input_dim == 4 and output_dim == 2: if adversarial: self.action_network, self.value_network = init_adversarial_net(adv_type='cart', distribution_in=distribution, adv_prob=self.adv_prob) self.bot_name += '_adversarial' + str(self.adv_prob) else: self.action_network, self.value_network = init_cart_nets(distribution) elif input_dim == 8 and output_dim == 4: if adversarial: self.action_network, self.value_network = init_adversarial_net(adv_type='lunar', distribution_in=distribution) self.bot_name += '_adversarial' + str(self.adv_prob) else: self.action_network, self.value_network = init_lander_nets(distribution) elif input_dim == 194 and output_dim == 44: if adversarial: self.action_network, self.value_network = init_adversarial_net(adv_type='sc', distribution_in=distribution) self.bot_name += '_adversarial' + str(self.adv_prob) else: self.action_network, self.value_network = init_sc_nets(distribution) elif input_dim == 32 and output_dim == 10: if adversarial: self.action_network, self.value_network = init_adversarial_net(adv_type='micro', distribution_in=distribution) self.bot_name += '_adversarial' + str(self.adv_prob) else: self.action_network, self.value_network = init_micro_net(distribution) self.ppo = ppo_update.PPO([self.action_network, self.value_network], two_nets=True) self.actor_opt = torch.optim.RMSprop(self.action_network.parameters()) self.value_opt = torch.optim.RMSprop(self.value_network.parameters()) self.last_state = [0, 0, 0, 0] self.last_action = 0 self.last_action_probs = torch.Tensor([0]) self.last_value_pred = torch.Tensor([[0, 0]]) self.full_probs = None self.reward_history = [] self.num_steps = 0
def __init__(self, bot_name='FCNet', input_dim=4, output_dim=2, sl_init=False): self.bot_name = bot_name self.sl_init = sl_init self.replay_buffer = replay_buffer.ReplayBufferSingleAgent() self.action_network = BaselineFCNet(input_dim=input_dim, output_dim=output_dim, is_value=False) self.value_network = BaselineFCNet(input_dim=input_dim, output_dim=output_dim, is_value=True) if self.sl_init: if input_dim == 4: self.teacher = CartPoleHeuristic() self.action_loss_threshold = 250 elif input_dim == 8: self.teacher = LunarHeuristic() self.action_loss_threshold = 350 elif input_dim == 32: self.teacher = StarCraftMicroHeuristic() self.action_loss_threshold = 500 elif input_dim > 100: self.teacher = StarCraftMacroHeuristic() self.action_loss_threshold = 1000 self.bot_name += '_SLtoRL_' self.ppo = ppo_update.PPO([self.action_network, self.value_network], two_nets=True) self.actor_opt = torch.optim.RMSprop(self.action_network.parameters()) self.value_opt = torch.optim.RMSprop(self.value_network.parameters()) self.last_state = [0, 0, 0, 0] self.last_action = 0 self.last_action_probs = torch.Tensor([0]) self.last_value_pred = torch.Tensor([[0, 0]]) self.last_deep_action_probs = torch.Tensor([0]) self.last_deep_value_pred = torch.Tensor([[0, 0]]) self.full_probs = None self.reward_history = [] self.num_steps = 0
def __init__(self, distribution='one_hot', bot_name='ProLoNet', input_dim=4, output_dim=2, use_gpu=False, vectorized=False, randomized=False, adversarial=False, deepen=True, epsilon=0.9, epsilon_decay=0.95, epsilon_min=0.05, deterministic=False): self.replay_buffer = replay_buffer.ReplayBufferSingleAgent() self.bot_name = bot_name self.use_gpu = use_gpu self.vectorized = vectorized self.randomized = randomized self.adversarial = adversarial self.deepen = deepen self.output_dim = output_dim self.input_dim = input_dim self.adv_prob = .05 self.epsilon = epsilon self.epsilon_decay = epsilon_decay self.epsilon_min = epsilon_min self.deterministic = deterministic if vectorized: self.bot_name += '_vect' if randomized: self.bot_name += '_rand' if use_gpu: self.bot_name += '_gpu' if deepen: self.bot_name += '_deepening' if input_dim == 4 and output_dim == 2: # CartPole self.action_network, self.value_network = init_cart_nets( distribution, use_gpu, vectorized, randomized) if adversarial: self.action_network, self.value_network = init_adversarial_net( adv_type='cart', distribution_in=distribution, adv_prob=self.adv_prob) self.bot_name += '_adversarial' + str(self.adv_prob) elif input_dim == 8 and output_dim == 4: # Lunar Lander self.action_network, self.value_network = init_lander_nets( distribution, use_gpu, vectorized, randomized) if adversarial: self.action_network, self.value_network = init_adversarial_net( adv_type='lunar', distribution_in=distribution, adv_prob=self.adv_prob) self.bot_name += '_adversarial' + str(self.adv_prob) elif input_dim == 194 and output_dim == 44: # SC Macro self.action_network, self.value_network = init_sc_nets( distribution, use_gpu, vectorized, randomized) elif input_dim == 37 and output_dim == 10: # SC Micro self.action_network, self.value_network = init_micro_net( distribution, use_gpu, vectorized, randomized) if adversarial: self.action_network, self.value_network = init_adversarial_net( adv_type='micro', distribution_in=distribution, adv_prob=self.adv_prob) self.bot_name += '_adversarial' + str(self.adv_prob) elif input_dim == 6 and output_dim == 5: # Fire Sim self.action_network, self.value_network = init_fire_nets( distribution, use_gpu, vectorized, randomized, bot_name.split('_')[0]) self.ppo = ppo_update.PPO([self.action_network, self.value_network], two_nets=True, use_gpu=use_gpu) self.actor_opt = torch.optim.RMSprop(self.action_network.parameters(), lr=1e-5) self.value_opt = torch.optim.RMSprop(self.value_network.parameters(), lr=1e-5) if self.deepen: self.deeper_action_network = add_level(self.action_network, use_gpu=use_gpu) self.deeper_value_network = add_level(self.value_network, use_gpu=use_gpu) self.deeper_actor_opt = torch.optim.RMSprop( self.deeper_action_network.parameters()) self.deeper_value_opt = torch.optim.RMSprop( self.deeper_value_network.parameters()) else: self.deeper_value_network = None self.deeper_action_network = None self.deeper_actor_opt = None self.deeper_value_opt = None self.num_times_deepened = 0 self.last_state = [0, 0, 0, 0] self.last_action = 0 self.last_action_probs = torch.Tensor([0]) self.last_value_pred = torch.Tensor([[0, 0]]) self.last_deep_action_probs = None self.last_deep_value_pred = [None] * output_dim self.full_probs = None self.deeper_full_probs = None self.reward_history = [] self.num_steps = 0
def __init__(self, bot_name='FCNet', input_dim=4, output_dim=2, sl_init=False, num_hidden=1): self.bot_name = bot_name + str(num_hidden) + '_hid' self.sl_init = sl_init self.input_dim = input_dim self.output_dim = output_dim self.num_hidden = num_hidden self.replay_buffer = replay_buffer.ReplayBufferSingleAgent() self.action_network = BaselineFCNet(input_dim=input_dim, output_dim=output_dim, is_value=False, hidden_layers=num_hidden) self.value_network = BaselineFCNet(input_dim=input_dim, output_dim=output_dim, is_value=True, hidden_layers=num_hidden) if self.sl_init: if input_dim == 4: self.teacher = CartPoleHeuristic() self.action_loss_threshold = 25 elif input_dim == 6: # Fire Sim self.teacher = DeepProLoNet( distribution='one_hot', input_dim=input_dim, output_dim=output_dim, use_gpu=False, vectorized=False, randomized=False, adversarial=False, deepen=False, deterministic=True, ) self.action_loss_threshold = 50 elif input_dim == 12: # Build Marines self.teacher = DeepProLoNet( distribution='one_hot', input_dim=input_dim, output_dim=output_dim, use_gpu=False, vectorized=False, randomized=False, adversarial=False, deepen=False, deterministic=True, ) self.action_loss_threshold = 50 elif input_dim == 8: self.teacher = LunarHeuristic() self.action_loss_threshold = 35 elif input_dim == 28: self.teacher = DeepProLoNet( distribution='one_hot', input_dim=input_dim, output_dim=output_dim, use_gpu=False, vectorized=False, randomized=False, adversarial=False, deepen=False, deterministic=True, ) self.teacher.load() self.action_loss_threshold = 50 elif input_dim == 37: self.teacher = StarCraftMicroHeuristic() self.action_loss_threshold = 50 elif input_dim > 100: self.teacher = StarCraftMacroHeuristic() self.action_loss_threshold = 1000 self.bot_name += '_SLtoRL_' self.ppo = ppo_update.PPO([self.action_network, self.value_network], two_nets=True) self.actor_opt = torch.optim.RMSprop(self.action_network.parameters(), lr=5e-3) self.value_opt = torch.optim.RMSprop(self.value_network.parameters(), lr=5e-3) # self.ppo.actor_opt = self.actor_opt # self.ppo.critic_opt = self.value_opt self.last_state = [0, 0, 0, 0] self.last_action = 0 self.last_action_probs = torch.Tensor([0]) self.last_value_pred = torch.Tensor([[0, 0]]) self.last_deep_action_probs = torch.Tensor([0]) self.last_deep_value_pred = torch.Tensor([[0, 0]]) self.full_probs = None self.reward_history = [] self.num_steps = 0