def __init__(self, test=False): # device if torch.cuda.is_available(): self.device = torch.device('cuda') else : self.device = torch.device('cpu') self.model = MLP(state_dim=4,action_num=2,hidden_dim=256).to(self.device) if test: self.load('./pg_best.cpt') # discounted reward self.gamma = 0.99 # optimizer self.optimizer = torch.optim.Adam(self.model.parameters(), lr=3e-3) # saved rewards and actions self.memory = Memory() self.tensorboard = TensorboardLogger('./')
class Agent(): def __init__(self, test=False): # device if torch.cuda.is_available(): self.device = torch.device('cuda') else : self.device = torch.device('cpu') self.model = MLP(state_dim=4,action_num=2,hidden_dim=256).to(self.device) if test: self.load('./pg_best.cpt') # discounted reward self.gamma = 0.99 # optimizer self.optimizer = torch.optim.Adam(self.model.parameters(), lr=3e-3) # saved rewards and actions self.memory = Memory() self.tensorboard = TensorboardLogger('./') def save(self, save_path): print('save model to', save_path) torch.save(self.model.state_dict(), save_path) def load(self, load_path): print('load model from', load_path) self.model.load_state_dict(torch.load(load_path)) def act(self,x,test=False): if not test: # boring type casting x = ((torch.from_numpy(x)).unsqueeze(0)).float().to(self.device) # stochastic sample action_prob = self.model(x) dist = torch.distributions.Categorical(action_prob) action = dist.sample() # memory log_prob self.memory.logprobs.append(dist.log_prob(action)) return action.item() else : self.model.eval() x = ((torch.from_numpy(x)).unsqueeze(0)).float().to(self.device) with torch.no_grad(): action_prob = self.model(x) # a = np.argmax(action_prob.cpu().numpy()) dist = torch.distributions.Categorical(action_prob) action = dist.sample() return action.item() def collect_data(self, state, action, reward): self.memory.actions.append(action) self.memory.rewards.append(torch.tensor(reward)) self.memory.states.append(state) def clear_data(self): self.memory.clear_memory() def update(self): R = 0 advantage_function = [] for t in reversed(range(0, len(self.memory.rewards))): R = R * self.gamma + self.memory.rewards[t] advantage_function.insert(0, R) # turn rewards to pytorch tensor and standardize advantage_function = torch.Tensor(advantage_function).to(self.device) advantage_function = (advantage_function - advantage_function.mean()) / (advantage_function.std() + np.finfo(np.float32).eps) policy_loss = [] for log_prob, reward in zip(self.memory.logprobs, advantage_function): policy_loss.append(-log_prob * reward) # Update network weights self.optimizer.zero_grad() loss = torch.cat(policy_loss).sum() loss.backward() self.optimizer.step() # boring log self.tensorboard.scalar_summary("loss", loss.item()) self.tensorboard.update()
def train(opts): device = torch.device("cuda" if use_cuda else "cpu") if opts.arch == 'small': channels = [32, 32, 32, 10] elif opts.arch == 'large': channels = [256, 128, 64, 32] else: raise NotImplementedError('Unknown model architecture') if opts.mode == 'train_mnist': train_loader, valid_loader = get_mnist_loaders(opts.data_dir, opts.bsize, opts.nworkers, opts.sigma, opts.alpha) model = CAE(1, 10, 28, opts.n_prototypes, opts.decoder_arch, channels) elif opts.mode == 'train_cifar': train_loader, valid_loader = get_cifar_loaders(opts.data_dir, opts.bsize, opts.nworkers, opts.sigma, opts.alpha) model = CAE(3, 10, 32, opts.n_prototypes, opts.decoder_arch, channels) elif opts.mode == 'train_fmnist': train_loader, valid_loader = get_fmnist_loaders( opts.data_dir, opts.bsize, opts.nworkers, opts.sigma, opts.alpha) model = CAE(1, 10, 28, opts.n_prototypes, opts.decoder_arch, channels) else: raise NotImplementedError('Unknown train mode') if opts.optim == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=opts.lr, weight_decay=opts.wd) else: raise NotImplementedError("Unknown optim type") criterion = nn.CrossEntropyLoss() start_n_iter = 0 # for choosing the best model best_val_acc = 0.0 model_path = os.path.join(opts.save_path, 'model_latest.net') if opts.resume and os.path.exists(model_path): # restoring training from save_state print('====> Resuming training from previous checkpoint') save_state = torch.load(model_path, map_location='cpu') model.load_state_dict(save_state['state_dict']) start_n_iter = save_state['n_iter'] best_val_acc = save_state['best_val_acc'] opts = save_state['opts'] opts.start_epoch = save_state['epoch'] + 1 model = model.to(device) # for logging logger = TensorboardLogger(opts.start_epoch, opts.log_iter, opts.log_dir) logger.set(['acc', 'loss', 'loss_class', 'loss_ae', 'loss_r1', 'loss_r2']) logger.n_iter = start_n_iter for epoch in range(opts.start_epoch, opts.epochs): model.train() logger.step() valid_sample = torch.stack([ valid_loader.dataset[i][0] for i in random.sample(range(len(valid_loader.dataset)), 10) ]).to(device) for batch_idx, (data, target) in enumerate(train_loader): acc, loss, class_error, ae_error, error_1, error_2 = run_iter( opts, data, target, model, criterion, device) # optimizer step optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), opts.max_norm) optimizer.step() logger.update(acc, loss, class_error, ae_error, error_1, error_2) val_loss, val_acc, val_class_error, val_ae_error, val_error_1, val_error_2, time_taken = evaluate( opts, model, valid_loader, criterion, device) # log the validation losses logger.log_valid(time_taken, val_acc, val_loss, val_class_error, val_ae_error, val_error_1, val_error_2) print('') # Save the model to disk if val_acc >= best_val_acc: best_val_acc = val_acc save_state = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'n_iter': logger.n_iter, 'opts': opts, 'val_acc': val_acc, 'best_val_acc': best_val_acc } model_path = os.path.join(opts.save_path, 'model_best.net') torch.save(save_state, model_path) prototypes = model.save_prototypes(opts.save_path, 'prototypes_best.png') x = torchvision.utils.make_grid(prototypes, nrow=10, pad_value=1.0) logger.writer.add_image('Prototypes (best)', x, epoch) save_state = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'n_iter': logger.n_iter, 'opts': opts, 'val_acc': val_acc, 'best_val_acc': best_val_acc } model_path = os.path.join(opts.save_path, 'model_latest.net') torch.save(save_state, model_path) prototypes = model.save_prototypes(opts.save_path, 'prototypes_latest.png') x = torchvision.utils.make_grid(prototypes, nrow=10, pad_value=1.0) logger.writer.add_image('Prototypes (latest)', x, epoch) ae_samples = model.get_decoded_pairs_grid(valid_sample) logger.writer.add_image('AE_samples_latest', ae_samples, epoch)
type=int, help='how long to wait before shutting down on error') parser.add_argument('--short-epoch', action='store_true', help='make epochs short (for debugging)') return parser cudnn.benchmark = True args = get_parser().parse_args() # Only want master rank logging to tensorboard is_master = (not args.distributed) or (dist_utils.env_rank() == 0) is_rank0 = args.local_rank == 0 tb = TensorboardLogger(args.logdir, is_master=is_master) log = FileLogger(args.logdir, is_master=is_master, is_rank0=is_rank0) def main(): os.system('shutdown -c') # cancel previous shutdown command log.console(args) tb.log('sizes/world', dist_utils.env_world_size()) # need to index validation directory before we start counting the time dataloader.sort_ar(args.data + '/validation') if args.distributed: log.console('Distributed initializing process group') torch.cuda.set_device(args.local_rank) dist.init_process_group(backend=args.dist_backend,
""" Single Agent states = (1, 33) np.array actions = (1, 4) np.array rewards = [] list with length 1 dones = [] list with length 1 """ """ Multi Agents states = (20, 33) np.array actions = (20, 4) np.array rewards = [] list with length 20 dones = [] list with length 20 """ log = TensorboardLogger('./p2_log') def act(): action_size = 4 actions = np.random.randn(20, action_size) # select an action (for each agent) actions = np.clip(actions, -1, 1) # all actions between -1 and 1 return actions def env_step(env, actions, brain_name): """ Return next_states, rewards, dones """ env_info = env.step(actions)[
hdlr = logging.FileHandler(logfile) hdlr.setFormatter(formatter) logger.addHandler(hdlr) logger.info('Configurations: %s', args) #Wandb and tensorboard logging is_master = (os.environ.get('RANK', '0') == '0') if args.projname != 'test': #initialize WANDB if not is_master: os.environ['WANDB_MODE'] = 'dryrun' # all wandb.log are no-op logger.info("local-only wandb logging for run " + args.name) group_name = args.name run_name = args.name + '-' + os.environ.get("RANK", "0") wandb.init(project=args.projname, group=group_name, name=run_name) logger.info("initializing wandb logging to group " + args.name + " name ") tb = TensorboardLogger(relative_path, is_master=is_master) #log = FileLogger(args.logdir, is_master=is_master, is_rank0=is_master) train_with_single(args.dnn, args.dataset, args.data_dir, 1, args.lr, args.batch_size, args.nsteps_update, args.max_epochs, args.num_steps, tb=tb)
tb_runs = './runs/%s' % logdir writer = None #SummaryWriter(tb_runs) logfile = os.path.join(relative_path, settings.hostname + '-' + str(rank) + '.log') hdlr = logging.FileHandler(logfile) hdlr.setFormatter(formatter) logger.addHandler(hdlr) logger.info('Configurations: %s', args) # Wandb and tensorboard logging # initialize WANDB if rank != 0: os.environ['WANDB_MODE'] = 'dryrun' # all wandb.log are no-op logger.info("local-only wandb logging for run " + args.name) tb = TensorboardLogger(relative_path, is_master=(rank == 0)) # log = FileLogger(args.logdir, is_master=is_master, is_rank0=is_master) # Ahmed - Scale learning rate with respect to compression ratio lr = args.lr if settings.SCALE_LR and args.density < 1: if args.optimizer == 'sgd': lr = args.lr * abs(math.log(args.density, 10)) else: lr = args.lr / abs(math.log(args.density)) # Ahmed - Update it to add configs if args.wandbkey != 'none': os.environ["WANDB_API_KEY"] = args.wandbkey if args.tags is None or args.tags == 'notags':
help= "name of the current run, used for machine naming and tensorboard visualization" ) parser.add_argument('--short-epoch', action='store_true', help='make epochs short (for debugging)') return parser cudnn.benchmark = True args = get_parser().parse_args() # Only want master rank logging to tensorboard is_master = (not args.distributed) or (dist_utils.env_rank() == 0) is_rank0 = args.local_rank == 0 tb = TensorboardLogger(args.logdir, is_master=is_master, name=args.name) log = FileLogger(args.logdir, is_master=is_master, is_rank0=is_rank0) def main(): os.system('shutdown -c') # cancel previous shutdown command log.console(args) tb.log('sizes/world', dist_utils.env_world_size()) # need to index validation directory before we start counting the time dataloader.sort_ar(args.data + '/validation') if args.distributed: log.console('Distributed initializing process group') torch.cuda.set_device(args.local_rank) dist.init_process_group(backend=args.dist_backend,
""" Single Agent states = (1, 33) np.array actions = (1, 4) np.array rewards = [] list with length 1 dones = [] list with length 1 """ """ Multi Agents states = (20, 33) np.array actions = (20, 4) np.array rewards = [] list with length 20 dones = [] list with length 20 """ log = TensorboardLogger('./p2_log_test') def act(): action_size = 4 actions = np.random.randn(20, action_size) # select an action (for each agent) actions = np.clip(actions, -1, 1) # all actions between -1 and 1 return actions def env_step(env, actions, brain_name): """ Return next_states, rewards, dones """ env_info = env.step(actions)[brain_name] # send all actions to tne environment next_states = env_info.vector_observations # get next state (for each agent) rewards = env_info.rewards # get reward (for each agent) dones = env_info.local_done # see if episode finished