def trainJump(save, save_as=None, curr_checkpoint=None): model.train() for episode in range(num_episodes): time.sleep(1.2) prev_score = getScore() if episode % 10 == 0: print("Score:", prev_score) print("-----------------------------------------") print("Episode:", episode) # Get state state = torch.Tensor(getLessProcessed()).unsqueeze(0) state_shape = state.shape state = state.view(state_shape[0], state_shape[3], state_shape[1], state_shape[2]) # Construct distribution based on calculated mean and variance mean = model(state) variance = final_variance + (initial_variance - final_variance) * \ math.exp(-1. * episode / variance_decay) if episode % 10 == 0: print("Mean:", float(mean), "Deviation:", float(variance)) m = Normal(mean, variance) # Sample and perform action action = m.sample() if episode % 10 == 0: print("Action:", action) os.system("adb shell input swipe 500 500 500 500 " + str(int(action))) time.sleep(0.5) # Get reward and optimize model reward = getReward(prev_score) if reward >= 2: reward = 10 elif reward == 1: reward = 0.3 elif reward < 0: reward = -10 onDeath() if episode % 1 == 0: print("Reward:", reward) loss = -m.log_prob(action) * reward optimizer.zero_grad() loss.backward() optimizer.step() if episode % 10 == 0: print("-----------------------------------------") if save: if (episode + 1) % 1001 == 0: save_file = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } file_name = save_as + str((episode // 1000) + curr_checkpoint) + ".pth" torch.save(save_file, file_name)
def trainJump(save, save_as=None, curr_checkpoint=None): Agent.model.eval() for episode in range(num_episodes): time.sleep(1.2) prev_score = getScore() if episode % 10 == 0: print("Score:", prev_score) print("-----------------------------------------") print("Episode:", episode) # Get state state = torch.Tensor(getLessProcessed()).unsqueeze(0) state_shape = state.shape state = state.view(state_shape[0], state_shape[3], state_shape[1], state_shape[2]) # Calculate mean and variance mean = Agent.a2c(state) variance = final_variance + (initial_variance - final_variance) * \ math.exp(-1. * episode / variance_decay) # Construct normal distribution and sample action if episode % 10 == 0: print("Mean:", float(mean), "Deviation:", float(variance)) m = Normal(mean, variance) action = m.sample() log_prob = m.log_prob(action) if episode % 10 == 0: print("Action:", action) os.system("adb shell input swipe 500 500 500 500 " + str(int(action))) # Get reward and push action, state, log_prob, and reward to action time.sleep(0.5) reward = getReward(prev_score) if reward >= 2: reward = 10 elif reward == 1: reward = 0.1 elif reward < 0: reward = -10 onDeath() Agent.add_mem(action, state, log_prob, reward) if episode % 10 == 0: print("Reward:", reward) if episode % 10 == 0: print("-----------------------------------------") # Optimize model if (episode + 1) % (k + 1) == 0: Agent.optimize_model(k, variance) if save: if (episode + 1) % 1001 == 0: Agent.save_agent(save_as + str(curr_checkpoint + (episode // 1000)) + ".pth")
def onDeath(): time.sleep(1.9) if getScore() == -10: os.system("adb shell input tap 550 1700") time.sleep(0.5) else: # Sometimes a weird pop up will come up, this closes it image = getImage() if (image[1000, 200] == [255, 255, 255]).all(): os.system("adb shell input tap 200 1400") time.sleep(0.1) os.system("adb shell input tap 550 1700")
def trainJump(save, save_as=None, curr_checkpoint=None): model.train() global variance for episode in range(num_episodes): print("-----------------------------------------") print("Episode:", episode) # Get state state = get_distance() prev_score = getScore() print("Distance:", state) state = np.array([state]) state = torch.from_numpy(state) state = state.float() # Calculate mean and variance mean = model(state) variance = final_variance + (initial_variance - final_variance) * \ math.exp(-1. * episode / variance_decay) print("Mean:", float(mean), "Deviation:", float(variance)) # Construct normal distribution based off of mean and variance and sample from it m = Normal(mean, variance) action = m.sample() # Perform action print("Action:", action) os.system("adb shell input swipe 500 500 500 500 " + str(int(action))) # Get reward and optimize model time.sleep(0.5) reward = getReward(prev_score) if reward >= 2: reward = 10 elif reward == 1: reward = 1 elif reward < 0: onDeath() print("Reward:", reward) loss = -m.log_prob(action) * reward optimizer.zero_grad() loss.backward() optimizer.step() if save: if (episode + 1) % 501 == 0: save_file = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } file_name = save_as + str((episode // 1000) + curr_checkpoint) + ".pth" torch.save(save_file, file_name)
def trainJump(save, save_as=None, curr_checkpoint=None): model.train() for episode in range(num_episodes): prev_score = getScore() print("-----------------------------------------") print("Episode:", episode) # Get state state = torch.Tensor(get_distance()) # Select action based off state node_activated, action = select_action(state) os.system("adb shell input swipe 500 500 500 500 " + str(action)) # Optimize model optimize_model() # Optimize here to save time # Get reward and push state, node_activated, and actual_reward to memory time.sleep(0.5) actual_reward = getReward(prev_score) print("Node Activated:", node_activated, "Action:", action) if actual_reward >= 2: actual_reward = 10 elif actual_reward < 0: onDeath() memory.push(state, node_activated, actual_reward) # Print difference between predicted and actual rewards predicted_reward = model(state).view(16)[node_activated] print("Predicted Reward:", float(predicted_reward), "Actual Reward:", actual_reward) print("-----------------------------------------") if save: if (episode + 1) % 1001 == 0: save_file = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } file_name = save_as + str((episode // 1000) + curr_checkpoint) + ".pth" torch.save(save_file, file_name)
def trainJump(save, save_as=None, curr_checkpoint=None): model.train() for episode in range(num_episodes): prev_score = getScore() print("-----------------------------------------") print("Episode:", episode) # Get state and select action based on state state = torch.Tensor(getLessProcessed()).unsqueeze(0) state_shape = state.shape state = state.view(state_shape[0], state_shape[3], state_shape[1], state_shape[2]) node_activated, action = select_action(state) os.system("adb shell input swipe 500 500 500 500 " + str(action)) optimize_model() # Optimize here to save time # Get actual reward and push state, node_activated, and actual_reward to memory actual_reward = getReward(prev_score) print("Node Activated:", node_activated, "Action:", action) if actual_reward >= 2: actual_reward = 10 elif actual_reward < 0: onDeath() memory.push(state, node_activated, actual_reward) # Print predicted and actual rewards predicted_reward = model(state).view(16)[node_activated] print("Predicted Reward:", float(predicted_reward), "Actual Reward:", actual_reward) print("-----------------------------------------") if save: if (episode + 1) % 1001 == 0: save_file = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } file_name = save_as + str((episode // 1000) + curr_checkpoint) + ".pth" torch.save(save_file, file_name)