def goal_seeking(goals_to_reach): sim_env = sim.SimulationEnvironment() action_repeat = 100 # steering_behavior = Wander(action_repeat) steering_behavior = Seek(sim_env.goal_body.position) #load model model = Action_Conditioned_FF() model.load_state_dict(torch.load('saved/saved_model.pkl')) model.eval() #load normalization parameters scaler = pickle.load(open("saved/scaler.pkl", "rb")) accurate_predictions, false_positives, missed_collisions = 0, 0, 0 robot_turned_around = False actions_checked = [] goals_reached = 0 while goals_reached < goals_to_reach: seek_vector = sim_env.goal_body.position - sim_env.robot.body.position if la.norm(seek_vector) < 50: sim_env.move_goal() steering_behavior.update_goal(sim_env.goal_body.position) goals_reached += 1 continue action_space = np.arange(-5, 6) actions_available = [] for action in action_space: network_param = get_network_param(sim_env, action, scaler) prediction = model(network_param) print(prediction) if prediction.item() < 0.25: actions_available.append(action) if len(actions_available) == 0: sim_env.turn_robot_around() continue action, _ = steering_behavior.get_action(sim_env.robot.body.position, sim_env.robot.body.angle) min, closest_action = 9999, 9999 for a in actions_available: diff = abs(action - a) if diff < min: min = diff closest_action = a steering_force = steering_behavior.get_steering_force( closest_action, sim_env.robot.body.angle) for action_timestep in range(action_repeat): _, collision, _ = sim_env.step(steering_force) if collision: steering_behavior.reset_action() break
def train_model(no_epochs): if torch.cuda.is_available(): dev = "cuda:0" else: dev = "cpu" device = torch.device(dev) print(device) batch_size = 256 data_loaders = Data_Loaders(batch_size) model = Action_Conditioned_FF() model.to(device) loss_function = nn.BCEWithLogitsLoss() losses = [] min_loss = model.evaluate(model, data_loaders.test_loader, loss_function) losses.append(min_loss) learning_rate = 0.1 optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) for epoch_i in range(no_epochs): model.train() epoch_loss = 0 epoch_acc = 0 # sample['input'] and sample['label'] for idx, sample in enumerate(data_loaders.train_loader): inpt = sample['input'].to(device) labels = sample['label'].to(device) labels = labels.unsqueeze(1) optimizer.zero_grad() outputs = model(inpt) loss = loss_function(outputs, labels) loss.backward() optimizer.step() epoch_loss += loss.item() print(f'| Epoch: {epoch_i+1}', end=" | ") print(f'Loss: {epoch_loss/len(data_loaders.train_loader):.4f} |') #print(f'Acc: {epoch_acc/len(data_loaders.train_loader):.3f}') model.eval() with torch.no_grad(): test_loss = model.evaluate(model, data_loaders.test_loader, loss_function) print(f'------- Test Loss: {test_loss:.4f} -------') losses.append(test_loss) PATH = f"saved/weights/weights_{test_loss:.3f}.pkl" torch.save(model.state_dict(), PATH, _use_new_zipfile_serialization=False)