def modelTest(test=False, chkpt=None, device='cuda'): """ modelTest is used to upload a saved model, and test out the results :param test: Bool should be set to true :param chkpt: string path of where the model exists :param device: cuda or cpu :return: None """ # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if not test: wandb.init(project="MultiSection Continum", name="Reaching Task 32 Per Layer") robot = Robot() robot.newSection() robot.newSection() env = Environment(robot) if test: env.staticPoint([-75, 150]) env.render() else: env.staticPoint([-75, 150]) lastObs = env.getObservation() rb = ReplayBuffer() minRBSize = 10000 sampleSize = 2500 envStepsBeforeTrain = 100 targetModelUpdate = 150 epsMin = 0.01 epsDecay = 0.99998 model = Model(len(lastObs.state), len(env.robot.actions)) if chkpt != None: model.load_state_dict( torch.load(chkpt, map_location=torch.device('cpu'))) targetModel = Model(len(lastObs.state), len(env.robot.actions)) updateTGTModel(model, targetModel) stepSinceTrain = 0 stepSinceTGTUpdate = 0 stepNum = -1 * minRBSize episodeRewards = [] rollingReward = 0 # Copying over the weights tq = tqdm() # Work in progress while True: if test: env.render() time.sleep(0.05) tq.update(1) eps = epsDecay**(stepNum / 100) if test: eps = 0 if random() < eps: action = env.robot.randomAction() else: actNum = model(torch.tensor(lastObs.state)).max(-1)[-1].item() action = env.robot.actions[actNum] obs = env.robotStep(action[0], action[1]) rollingReward = obs.reward # print(obs) # # env.render() # x = model(torch.Tensor(obs.state)) # # print(x) # episodeRewards.append(rollingReward) # # if stepSinceTGTUpdate > targetModelUpdate: # # if env.done(): # episodeRewards.append(rollingReward) # if test: # print(rollingReward) # print(episodeRewards) # rollingReward = 0 # # env.reset() obs.reward = obs.reward / 100 stepSinceTrain += 1 stepNum += 1 rb.insert(obs) if (not test) and len( rb.buffer ) >= minRBSize and stepSinceTrain > envStepsBeforeTrain: stepSinceTGTUpdate += 1 loss = trainStep(rb.sample(sampleSize), model, targetModel, len(env.robot.actions), device) wandb.log( { "Loss": loss.detach().item(), "eps": eps, "Step Rewards:": np.mean(episodeRewards) }, step=stepNum) stepSinceTrain = 0 if stepSinceTGTUpdate > targetModelUpdate: print("Updating Target Model") updateTGTModel(model, targetModel) stepSinceTGTUpdate = 0 torch.save(targetModel.state_dict(), f"Models/{stepNum}.pth") episodeRewards = []
from environment.environment import section, Environment, Observation, Robot if __name__ == '__main__': robot = Robot() robot.newSection() robot.newSection() env = Environment(robot) # env.staticPoint([-9.966711079379195, 99.3346653975306]) env.render() while True: secNum = int(input("Enter SecNum: ")) direction = str(input("Enter direction: ")) steps = int(input("Enter number of steps: ")) for i in range(steps): obs = env.robotStep(secNum, direction) print(env.robot.endEffectorPos()) print(obs) env.render()
def main(test=False, chkpt=None, device='cuda'): """ main is used to start and preform the training in non-render mode :param test: Not required :param chkpt: Not required :param device: string (cuda or cpu) :return: None """ # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if not test: wandb.init(project="MultiSection Continum", name="Reaching Task 32 Per Layer") robot = Robot() robot.newSection() robot.newSection() env = Environment(robot) if test: # env.staticPoint([-9.966711079379195, 99.3346653975306]) env.render() # else: # env.staticPoint([-9.966711079379195, 99.3346653975306]) lastObs = env.getObservation() rb = ReplayBuffer() memorySize = 500000 minRBSize = 20000 sampleSize = 750 envStepsBeforeTrain = 250 targetModelUpdate = 500 epsMin = 0.01 epsDecay = 0.99999 model = Model(len(lastObs.state), len(env.robot.actions)).to(device) if chkpt != None: model.load_state_dict(torch.load(chkpt)) targetModel = Model(len(lastObs.state), len(env.robot.actions)).to(device) updateTGTModel(model, targetModel) stepSinceTrain = 0 # stepSinceTrain keeps track of the number of steps since the last main network training # in this case main network updates after every envStepsBeforeTrain stepSinceTGTUpdate = 0 # stepSinceTGTUpdate keeps track of the number of steps since the last target network update (ie transfering main network weights) # in this case the target network updates after every targetModelUpdate stepNum = -1 * minRBSize episodeRewards = [] rollingReward = 0 # Copying over the weights tq = tqdm() # Work in progress while True: if test: env.render() time.sleep(0.05) tq.update(1) eps = epsDecay**(stepNum / 10) if test: eps = 0 if random() < eps: # print("Taking random action") action = env.robot.randomAction() else: actNum = model(torch.tensor( lastObs.state).to(device)).max(-1)[-1].item() action = env.robot.actions[actNum] obs = env.robotStep(action[0], action[1]) rollingReward = obs.reward # print(obs) # # env.render() # x = model(torch.Tensor(obs.state)) # # print(x) # episodeRewards.append(rollingReward) # # if stepSinceTGTUpdate > targetModelUpdate: # # if env.done(): # episodeRewards.append(rollingReward) # if test: # print(rollingReward) # print(episodeRewards) # rollingReward = 0 # # env.reset() if env.done(): env.reset() # env.staticPoint([-9.966711079379195, 99.3346653975306]) # obs.reward = obs.reward / 100 stepSinceTrain += 1 stepNum += 1 rb.insert(obs) if ( not test ) and rb.index >= minRBSize and stepSinceTrain > envStepsBeforeTrain: stepSinceTGTUpdate += 1 loss = trainStep(rb.sample(sampleSize), model, targetModel, len(env.robot.actions), device) wandb.log( { "Loss": loss.detach().cpu().item(), "eps": eps, "Step Rewards:": np.mean(episodeRewards) }, step=stepNum) stepSinceTrain = 0 if stepSinceTGTUpdate > targetModelUpdate: print("Updating Target Model") updateTGTModel(model, targetModel) stepSinceTGTUpdate = 0 torch.save( targetModel.state_dict(), f"/u/meharabd/research/CRLMachineLearningProject/Models/{stepNum}.pth" ) episodeRewards = []