Beispiel #1
0
 def __init__(self, test=False):
     # device
     if torch.cuda.is_available():
         self.device = torch.device('cuda')
     else :
         self.device = torch.device('cpu')
     
     self.model = MLP(state_dim=4,action_num=2,hidden_dim=256).to(self.device)  
     if test:
         self.load('./pg_best.cpt')        
     # discounted reward
     self.gamma = 0.99 
     # optimizer
     self.optimizer = torch.optim.Adam(self.model.parameters(), lr=3e-3)
     # saved rewards and actions
     self.memory = Memory()
     self.tensorboard = TensorboardLogger('./')
Beispiel #2
0
class Agent():
    def __init__(self, test=False):
        # device
        if torch.cuda.is_available():
            self.device = torch.device('cuda')
        else :
            self.device = torch.device('cpu')
        
        self.model = MLP(state_dim=4,action_num=2,hidden_dim=256).to(self.device)  
        if test:
            self.load('./pg_best.cpt')        
        # discounted reward
        self.gamma = 0.99 
        # optimizer
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=3e-3)
        # saved rewards and actions
        self.memory = Memory()
        self.tensorboard = TensorboardLogger('./')
    def save(self, save_path):
        print('save model to', save_path)
        torch.save(self.model.state_dict(), save_path)
    def load(self, load_path):
        print('load model from', load_path)
        self.model.load_state_dict(torch.load(load_path))
    def act(self,x,test=False):
        if not test:
            # boring type casting
            x = ((torch.from_numpy(x)).unsqueeze(0)).float().to(self.device)
            # stochastic sample
            action_prob = self.model(x)
            dist = torch.distributions.Categorical(action_prob)
            action = dist.sample()
            # memory log_prob
            self.memory.logprobs.append(dist.log_prob(action))
            return action.item()    
        else :
            self.model.eval()
            x = ((torch.from_numpy(x)).unsqueeze(0)).float().to(self.device)
            with torch.no_grad():
                action_prob = self.model(x)
                # a = np.argmax(action_prob.cpu().numpy())
                dist = torch.distributions.Categorical(action_prob)
                action = dist.sample()
                return action.item()
    def collect_data(self, state, action, reward):
        self.memory.actions.append(action)
        self.memory.rewards.append(torch.tensor(reward))
        self.memory.states.append(state)
    def clear_data(self):
        self.memory.clear_memory()

    def update(self):
        R = 0
        advantage_function = []        
        for t in reversed(range(0, len(self.memory.rewards))):
            R = R * self.gamma + self.memory.rewards[t]
            advantage_function.insert(0, R)

        # turn rewards to pytorch tensor and standardize
        advantage_function = torch.Tensor(advantage_function).to(self.device)
        advantage_function = (advantage_function - advantage_function.mean()) / (advantage_function.std() + np.finfo(np.float32).eps)

        policy_loss = []
        for log_prob, reward in zip(self.memory.logprobs, advantage_function):
            policy_loss.append(-log_prob * reward)
        # Update network weights
        self.optimizer.zero_grad()
        loss = torch.cat(policy_loss).sum()
        loss.backward()
        self.optimizer.step() 
        # boring log
        self.tensorboard.scalar_summary("loss", loss.item())
        self.tensorboard.update()
Beispiel #3
0
def train(opts):

    device = torch.device("cuda" if use_cuda else "cpu")

    if opts.arch == 'small':
        channels = [32, 32, 32, 10]
    elif opts.arch == 'large':
        channels = [256, 128, 64, 32]
    else:
        raise NotImplementedError('Unknown model architecture')

    if opts.mode == 'train_mnist':
        train_loader, valid_loader = get_mnist_loaders(opts.data_dir,
                                                       opts.bsize,
                                                       opts.nworkers,
                                                       opts.sigma, opts.alpha)
        model = CAE(1, 10, 28, opts.n_prototypes, opts.decoder_arch, channels)
    elif opts.mode == 'train_cifar':
        train_loader, valid_loader = get_cifar_loaders(opts.data_dir,
                                                       opts.bsize,
                                                       opts.nworkers,
                                                       opts.sigma, opts.alpha)
        model = CAE(3, 10, 32, opts.n_prototypes, opts.decoder_arch, channels)
    elif opts.mode == 'train_fmnist':
        train_loader, valid_loader = get_fmnist_loaders(
            opts.data_dir, opts.bsize, opts.nworkers, opts.sigma, opts.alpha)
        model = CAE(1, 10, 28, opts.n_prototypes, opts.decoder_arch, channels)
    else:
        raise NotImplementedError('Unknown train mode')

    if opts.optim == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opts.lr,
                                     weight_decay=opts.wd)
    else:
        raise NotImplementedError("Unknown optim type")
    criterion = nn.CrossEntropyLoss()

    start_n_iter = 0
    # for choosing the best model
    best_val_acc = 0.0

    model_path = os.path.join(opts.save_path, 'model_latest.net')
    if opts.resume and os.path.exists(model_path):
        # restoring training from save_state
        print('====> Resuming training from previous checkpoint')
        save_state = torch.load(model_path, map_location='cpu')
        model.load_state_dict(save_state['state_dict'])
        start_n_iter = save_state['n_iter']
        best_val_acc = save_state['best_val_acc']
        opts = save_state['opts']
        opts.start_epoch = save_state['epoch'] + 1

    model = model.to(device)

    # for logging
    logger = TensorboardLogger(opts.start_epoch, opts.log_iter, opts.log_dir)
    logger.set(['acc', 'loss', 'loss_class', 'loss_ae', 'loss_r1', 'loss_r2'])
    logger.n_iter = start_n_iter

    for epoch in range(opts.start_epoch, opts.epochs):
        model.train()
        logger.step()
        valid_sample = torch.stack([
            valid_loader.dataset[i][0]
            for i in random.sample(range(len(valid_loader.dataset)), 10)
        ]).to(device)

        for batch_idx, (data, target) in enumerate(train_loader):
            acc, loss, class_error, ae_error, error_1, error_2 = run_iter(
                opts, data, target, model, criterion, device)

            # optimizer step
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), opts.max_norm)
            optimizer.step()

            logger.update(acc, loss, class_error, ae_error, error_1, error_2)

        val_loss, val_acc, val_class_error, val_ae_error, val_error_1, val_error_2, time_taken = evaluate(
            opts, model, valid_loader, criterion, device)
        # log the validation losses
        logger.log_valid(time_taken, val_acc, val_loss, val_class_error,
                         val_ae_error, val_error_1, val_error_2)
        print('')

        # Save the model to disk
        if val_acc >= best_val_acc:
            best_val_acc = val_acc
            save_state = {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'n_iter': logger.n_iter,
                'opts': opts,
                'val_acc': val_acc,
                'best_val_acc': best_val_acc
            }
            model_path = os.path.join(opts.save_path, 'model_best.net')
            torch.save(save_state, model_path)
            prototypes = model.save_prototypes(opts.save_path,
                                               'prototypes_best.png')
            x = torchvision.utils.make_grid(prototypes, nrow=10, pad_value=1.0)
            logger.writer.add_image('Prototypes (best)', x, epoch)

        save_state = {
            'epoch': epoch,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'n_iter': logger.n_iter,
            'opts': opts,
            'val_acc': val_acc,
            'best_val_acc': best_val_acc
        }
        model_path = os.path.join(opts.save_path, 'model_latest.net')
        torch.save(save_state, model_path)
        prototypes = model.save_prototypes(opts.save_path,
                                           'prototypes_latest.png')
        x = torchvision.utils.make_grid(prototypes, nrow=10, pad_value=1.0)
        logger.writer.add_image('Prototypes (latest)', x, epoch)
        ae_samples = model.get_decoded_pairs_grid(valid_sample)
        logger.writer.add_image('AE_samples_latest', ae_samples, epoch)
Beispiel #4
0
                        type=int,
                        help='how long to wait before shutting down on error')

    parser.add_argument('--short-epoch',
                        action='store_true',
                        help='make epochs short (for debugging)')
    return parser


cudnn.benchmark = True
args = get_parser().parse_args()

# Only want master rank logging to tensorboard
is_master = (not args.distributed) or (dist_utils.env_rank() == 0)
is_rank0 = args.local_rank == 0
tb = TensorboardLogger(args.logdir, is_master=is_master)
log = FileLogger(args.logdir, is_master=is_master, is_rank0=is_rank0)


def main():
    os.system('shutdown -c')  # cancel previous shutdown command
    log.console(args)
    tb.log('sizes/world', dist_utils.env_world_size())

    # need to index validation directory before we start counting the time
    dataloader.sort_ar(args.data + '/validation')

    if args.distributed:
        log.console('Distributed initializing process group')
        torch.cuda.set_device(args.local_rank)
        dist.init_process_group(backend=args.dist_backend,
Beispiel #5
0
"""
Single Agent
states = (1, 33) np.array
actions = (1, 4) np.array
rewards = [] list with length 1
dones = [] list with length 1
"""
"""
Multi Agents
states = (20, 33) np.array
actions = (20, 4) np.array
rewards = [] list with length 20
dones = [] list with length 20
"""

log = TensorboardLogger('./p2_log')


def act():
    action_size = 4
    actions = np.random.randn(20,
                              action_size)  # select an action (for each agent)
    actions = np.clip(actions, -1, 1)  # all actions between -1 and 1
    return actions


def env_step(env, actions, brain_name):
    """
    Return next_states, rewards, dones
    """
    env_info = env.step(actions)[
Beispiel #6
0
    hdlr = logging.FileHandler(logfile)
    hdlr.setFormatter(formatter)
    logger.addHandler(hdlr)
    logger.info('Configurations: %s', args)

    #Wandb and tensorboard logging
    is_master = (os.environ.get('RANK', '0') == '0')
    if args.projname != 'test':
        #initialize WANDB
        if not is_master:
            os.environ['WANDB_MODE'] = 'dryrun'  # all wandb.log are no-op
            logger.info("local-only wandb logging for run " + args.name)
        group_name = args.name
        run_name = args.name + '-' + os.environ.get("RANK", "0")
        wandb.init(project=args.projname, group=group_name, name=run_name)
        logger.info("initializing wandb logging to group " + args.name +
                    " name ")
    tb = TensorboardLogger(relative_path, is_master=is_master)
    #log = FileLogger(args.logdir, is_master=is_master, is_rank0=is_master)

    train_with_single(args.dnn,
                      args.dataset,
                      args.data_dir,
                      1,
                      args.lr,
                      args.batch_size,
                      args.nsteps_update,
                      args.max_epochs,
                      args.num_steps,
                      tb=tb)
Beispiel #7
0
        tb_runs = './runs/%s' % logdir
        writer = None  #SummaryWriter(tb_runs)
    logfile = os.path.join(relative_path,
                           settings.hostname + '-' + str(rank) + '.log')
    hdlr = logging.FileHandler(logfile)
    hdlr.setFormatter(formatter)
    logger.addHandler(hdlr)
    logger.info('Configurations: %s', args)

    # Wandb and tensorboard logging
    # initialize WANDB
    if rank != 0:
        os.environ['WANDB_MODE'] = 'dryrun'  # all wandb.log are no-op
        logger.info("local-only wandb logging for run " + args.name)

    tb = TensorboardLogger(relative_path, is_master=(rank == 0))
    # log = FileLogger(args.logdir, is_master=is_master, is_rank0=is_master)

    # Ahmed - Scale learning rate with respect to compression ratio
    lr = args.lr

    if settings.SCALE_LR and args.density < 1:
        if args.optimizer == 'sgd':
            lr = args.lr * abs(math.log(args.density, 10))
        else:
            lr = args.lr / abs(math.log(args.density))

    # Ahmed - Update it to add configs
    if args.wandbkey != 'none':
        os.environ["WANDB_API_KEY"] = args.wandbkey
    if args.tags is None or args.tags == 'notags':
        help=
        "name of the current run, used for machine naming and tensorboard visualization"
    )
    parser.add_argument('--short-epoch',
                        action='store_true',
                        help='make epochs short (for debugging)')
    return parser


cudnn.benchmark = True
args = get_parser().parse_args()

# Only want master rank logging to tensorboard
is_master = (not args.distributed) or (dist_utils.env_rank() == 0)
is_rank0 = args.local_rank == 0
tb = TensorboardLogger(args.logdir, is_master=is_master, name=args.name)
log = FileLogger(args.logdir, is_master=is_master, is_rank0=is_rank0)


def main():
    os.system('shutdown -c')  # cancel previous shutdown command
    log.console(args)
    tb.log('sizes/world', dist_utils.env_world_size())

    # need to index validation directory before we start counting the time
    dataloader.sort_ar(args.data + '/validation')

    if args.distributed:
        log.console('Distributed initializing process group')
        torch.cuda.set_device(args.local_rank)
        dist.init_process_group(backend=args.dist_backend,
Beispiel #9
0
"""
Single Agent
states = (1, 33) np.array
actions = (1, 4) np.array
rewards = [] list with length 1
dones = [] list with length 1
"""
"""
Multi Agents
states = (20, 33) np.array
actions = (20, 4) np.array
rewards = [] list with length 20
dones = [] list with length 20
"""

log = TensorboardLogger('./p2_log_test')

def act():
    action_size = 4
    actions = np.random.randn(20, action_size) # select an action (for each agent)
    actions = np.clip(actions, -1, 1)                  # all actions between -1 and 1 
    return actions

def env_step(env, actions, brain_name):
    """
    Return next_states, rewards, dones
    """
    env_info = env.step(actions)[brain_name]           # send all actions to tne environment
    next_states = env_info.vector_observations         # get next state (for each agent)
    rewards = env_info.rewards                         # get reward (for each agent)
    dones = env_info.local_done                        # see if episode finished