def __init__(self, data_path=None): self._data_path = data_path if not os.path.exists(self.data_path): os.makedirs(self.data_path) omniglot_data = self.read_omniglot_data(self.data_path) self._train = Dataset(omniglot_data['x_train'], None) self._test = Dataset(None, None) self._validation = Dataset(None, None) self._image_dim = 28 * 28 * 1 self._image_shape = (28, 28, 1)
def __init__(self, data_path=None): self._data_path = data_path if not os.path.exists(self.data_path): os.makedirs(self.data_path) celebA_data = self.read_celebA_data(self.data_path) self._train = Dataset(celebA_data['x_train'], celebA_data['y_train']) self._test = Dataset(celebA_data['x_test'], celebA_data['y_test']) self._validation = Dataset(celebA_data['x_val'], celebA_data['y_val']) self._image_dim = 32 * 32 * 3 self._image_shape = (32, 32, 3)
def __init__(self, data_path=None): self._data_path = data_path if not os.path.exists(self.data_path): os.makedirs(self.data_path) data_from_tf = mnist.input_data.read_data_sets(self.data_path) self._train = Dataset(data_from_tf.train.images, data_from_tf.train.labels) self._test = Dataset(data_from_tf.test.images, data_from_tf.test.labels) self._validation = Dataset(data_from_tf.validation.images, data_from_tf.validation.labels) self._image_dim = 28 * 28 self._image_shape = (28, 28, 1)
def train(self, update_critic, update_actor): """Train the agent""" # Get a batch of transitions from the replay buffer if self.hps.n_step_returns: batch = self.replay_buffer.lookahead_sample(self.hps.batch_size, n=self.hps.lookahead, gamma=self.hps.gamma) else: batch = self.replay_buffer.sample(self.hps.batch_size) if not self.hps.pixels: batch['obs0'] = ((batch['obs0'] - self.rms_obs.mean) / (np.sqrt(self.rms_obs.var) + 1e-8)) batch['obs0'] = np.clip(batch['obs0'], -5.0, 5.0) # Create tensors from the inputs state = torch.FloatTensor(batch['obs0']).to(self.device) action = torch.FloatTensor(batch['acs']).to(self.device) next_state = torch.FloatTensor(batch['obs1']).to(self.device) reward = torch.FloatTensor(batch['rews']).to(self.device) done = torch.FloatTensor(batch['dones1'].astype('float32')).to( self.device) if self.hps.prioritized_replay: iws = torch.FloatTensor(batch['iws']).to(self.device) if self.hps.n_step_returns: td_len = torch.FloatTensor(batch['td_len']).to(self.device) else: td_len = torch.ones_like(done).to(self.device) if self.hps.enable_targ_actor_smoothing: n_ = action.clone().detach().data.normal_(0, self.hps.td3_std).to( self.device) n_ = n_.clamp(-self.hps.td3_c, self.hps.td3_c) next_action = (self.targ_actor(next_state) + n_).clamp( -self.max_ac, self.max_ac) else: next_action = self.targ_actor(next_state) # Create data loaders dataset = Dataset(batch) dataloader = DataLoader(dataset, self.hps.batch_size, shuffle=True) # Iterable over 1 element, but cleaner that way # Collect recent pairs uniformly from the experience replay buffer window = 128 # HAXX assert window >= self.hps.batch_size, "must have window >= batch_size" recent_batch = self.replay_buffer.sample_recent( self.hps.batch_size, window) recent_dataset = Dataset(recent_batch) recent_dataloader = DataLoader(recent_dataset, self.hps.batch_size, shuffle=True) # Iterable over 1 element, but cleaner that way # Compute losses # Compute Q estimate q = self.critic(state, action) if self.hps.enable_clipped_double: twin_q = self.twin_critic(state, action) # Compute target Q estimate q_prime = self.targ_critic(next_state, next_action) if self.hps.enable_clipped_double: # Define Q' as the minimum Q value between TD3's twin Q's twin_q_prime = self.targ_twin_critic(next_state, next_action) q_prime = torch.min(q_prime, twin_q_prime) targ_q = reward + (self.hps.gamma** td_len) * (1. - done) * q_prime.detach() # Critic loss huber_td_errors = F.smooth_l1_loss(q, targ_q, reduction='none') if self.hps.enable_clipped_double: twin_huber_td_errors = F.smooth_l1_loss(twin_q, targ_q, reduction='none') if self.hps.prioritized_replay: # Adjust with importance weights huber_td_errors *= iws if self.hps.enable_clipped_double: twin_huber_td_errors *= iws critic_loss = huber_td_errors.mean() if self.hps.enable_clipped_double: twin_critic_loss = twin_huber_td_errors.mean() # Actor loss actor_loss = -self.critic(state, self.actor(state)).mean() # Actor grads self.actor_optimizer.zero_grad() actor_loss.backward() actor_gradnorm = U.clip_grad_norm_(self.actor.parameters(), self.hps.clip_norm) # Critic(s) grads self.critic_optimizer.zero_grad() if self.hps.enable_clipped_double: self.twin_critic_optimizer.zero_grad() critic_loss.backward() critic_gradnorm = U.clip_grad_norm_(self.critic.parameters(), self.hps.clip_norm) if self.hps.enable_clipped_double: twin_critic_loss.backward() twin_critic_gradnorm = U.clip_grad_norm_( self.twin_critic.parameters(), self.hps.clip_norm) # Update critic(s) average_gradients(self.critic, self.device) self.critic_optimizer.step() if self.hps.enable_clipped_double: average_gradients(self.twin_critic, self.device) self.twin_critic_optimizer.step() if update_actor: # Update actor average_gradients(self.actor, self.device) self.actor_optimizer.step() # Update target nets self.update_target_net() if self.hps.prioritized_replay: # Update priorities td_errors = q - targ_q if self.hps.enable_clipped_double: td_errors = torch.min(q - targ_q, twin_q - targ_q) new_priorities = np.abs( td_errors.detach().cpu().numpy()) + 1e-6 # epsilon from paper self.replay_buffer.update_priorities(batch['idxs'], new_priorities) for _ in range(self.hps.d_update_ratio): for chunk, e_chunk in zip(dataloader, self.e_dataloader): self.update_discriminator(chunk, e_chunk) for chunk, e_chunk in zip(recent_dataloader, self.e_dataloader): self.update_discriminator(chunk, e_chunk) # Aggregate the elements to return losses = { 'actor': actor_loss.clone().cpu().data.numpy(), 'critic': critic_loss.clone().cpu().data.numpy() } gradnorms = {'actor': actor_gradnorm, 'critic': critic_gradnorm} if self.hps.enable_clipped_double: losses.update( {'twin_critic': twin_critic_loss.clone().cpu().data.numpy()}) gradnorms.update({'twin_critic': twin_critic_gradnorm}) return losses, gradnorms
for dimension in train_matrix: temp_array = DataFormatter.flatten_structure_to_one_dim_structure(dimension) my_vals.append(np.array(temp_array)) train_vectors = np.array(my_vals) my_vals = [] for dimension in test_matrix: temp_array = DataFormatter.flatten_structure_to_one_dim_structure(dimension) my_vals.append(np.array(temp_array)) test_vectors = np.array(my_vals) train_tensor = torch.from_numpy(train_vectors) test_tensor = torch.from_numpy(test_vectors) # train_dataset = Dataset(train_tensor, training_targets) test_dataset = Dataset(test_tensor, test_targets) model = OtherLstm(726, 50, batch_size=BATCH_SIZE, output_dim=1, num_layers=2) loss_fn = torch.nn.MSELoss() optimiser = torch.optim.Adam(model.parameters(), lr=0.01) NUM_EPOCS = 100 hist = np.zeros(NUM_EPOCS) for t in range(NUM_EPOCS): model.zero_grad() current_epoc_loss = 0 train_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE,
plot_regression_surface(model, normalizer, avg_realization.training_set + avg_realization.test_set, x_label="X", y_label="Y", scatter_label='Base de dados', model_label='Predição do modelo', title='Artificial I') # Generate datasets artificial 1 and 2 # generate_datasets() # Artificial 1 # dataset = Dataset('assignment2/datasets/artificial1.csv') # Artificial 2 dataset = Dataset('assignment2/datasets/artificial2.csv') learning_rate = 0.01 ratio = 0.8 epochs = 100 model = Adaline(epochs=epochs, learning_rate=learning_rate, early_stopping=True, verbose=False) evaluate(model, dataset.load(), ratio=ratio, num_realizations=20) print("Done!")
}) # Iris dataset iris_encodings = [ { 'Iris-setosa': 0 }, # Binary: 0 - Setosa, 1 - Others { 'Iris-versicolor': 0 }, # Binary: 0 - Virginica, 1 - Others { 'Iris-virginica': 0 }, # Binary: 0 - Versicolor, 1 - Others ] dataset = Dataset('assignment1/datasets/iris.csv', encoding=iris_encodings[0]) # dataset = Dataset('assignment1/datasets/artificial.csv') draw_decision_surface = "artificial" in dataset.filename learning_rate = 0.01 ratio = 0.8 epochs = 100 # Setosa, virginica, artificial # epochs = 10 # Versicolor # select_training_hyper_parameters(dataset) model = Perceptron(epochs=epochs, learning_rate=learning_rate, early_stopping=True,