def select_actions(self, states, explore_noise=0.0): # CPU array to GPU tensor to CPU array states = torch.tensor(states, dtype=torch.float32, device=self.device) actions = self.act(states) if explore_noise != 0.0: pis = self.act.actor(states) actions = Normal(*pis).sample() actions = actions.tanh() actions = actions.cpu().data.numpy() return actions
with torch.cuda.device(3): # Do it with CUDA if possible. device = 'cuda' if torch.cuda.is_available() else 'cpu' if device == 'cuda': encd.cuda() decd.cuda() disc.cuda() if len(sys.argv) > 1: for batch in data.batches(): batch = batch.to(device) with torch.no_grad(): mu, sd = decd(encd(batch)) y = Normal(mu, sd).sample() np.savetxt(sys.stdout, y.cpu().numpy(), fmt='%.4f') sys.exit() lr = 0.001 # The celebrated learning rate aopt = torch.optim.Adam(encd.parameters(), lr=lr) bopt = torch.optim.Adam(decd.parameters(), lr=lr) copt = torch.optim.Adam(disc.parameters(), lr=lr) asched = torch.optim.lr_scheduler.MultiStepLR(aopt, [800]) bsched = torch.optim.lr_scheduler.MultiStepLR(bopt, [800]) csched = torch.optim.lr_scheduler.MultiStepLR(copt, [800]) # (Binary) cross-entropy loss. loss_clsf = nn.BCELoss(reduction='mean')