コード例 #1
0
    def loop(self):
        state = self.env.reset()
        episode_reward = 0
        best_episode_reward = None
        all_rewards = []
        w = tensorboard.SummaryWriter()

        for step in range(self.max_steps):
            epsilon = self._exploration(step)

            if np.random.random() > epsilon:
                action = self.model.get_action(state)
            else:
                action = np.random.randint(0,
                                           self.model.action_bins,
                                           size=self.model.action_space)

            next_state, reward, done, infos = self.env.step(action)
            episode_reward += reward

            if done:
                next_state = self.env.reset()
                all_rewards.append(episode_reward)
                print("Reward on Episode {}: {}".format(
                    len(all_rewards), episode_reward))
                w.add_scalar("reward/episode_reward",
                             episode_reward,
                             global_step=len(all_rewards))
                if best_episode_reward == None or episode_reward > best_episode_reward:
                    best_episode_reward = episode_reward
                    save_best(self.model, all_rewards, self.env.name,
                              self.output_dir)
                episode_reward = 0

            self.memory.push(
                (state.reshape(-1).numpy().tolist(), action, reward,
                 next_state.reshape(-1).numpy().tolist(), 0. if done else 1.))
            state = next_state

            if step > self.start_learning:
                loss = self.model.update_policy(
                    self.memory.sample(self.batch_size))
                w.add_scalar("loss/loss", loss, global_step=step)

            if step % self.save_update_freq == 0:
                save_checkpoint(self.model, all_rewards, self.env.name,
                                self.output_dir)

            if len(all_rewards) == self.max_episodes:
                save_checkpoint(self.model, all_rewards, self.env.name,
                                self.output_dir)
                break

        w.close()
コード例 #2
0
ファイル: train.py プロジェクト: sergeyshilin/xray-lightning
def train(cfg: DictConfig) -> None:
    """
    Run model training.

    Parameters
    ----------
    cfg : DictConfig
        Project configuration object
    """
    model = load_obj(cfg.model.backbone.class_name)
    model = model(**cfg.model.backbone.params)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    head = load_obj(cfg.model.head.class_name)

    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = head(in_features,
                                         cfg.model.head.params.num_classes)

    set_seed(cfg.training.seed)
    hparams = flatten_omegaconf(cfg)
    xray_detection = XrayDetection(hparams=hparams, cfg=cfg, model=model)

    callbacks = xray_detection.get_callbacks()
    loggers = xray_detection.get_loggers()

    trainer = pl.Trainer(
        logger=loggers,
        early_stop_callback=callbacks["early_stopping"],
        checkpoint_callback=callbacks["model_checkpoint"],
        **cfg.trainer,
    )
    trainer.fit(xray_detection)

    # Load the best checkpoint
    get_logger().info("Saving model from the best checkpoint...")
    checkpoints = [
        ckpt for ckpt in os.listdir("./")
        if ckpt.endswith(".ckpt") and ckpt != "last.ckpt"
    ]
    best_checkpoint_path = checkpoints[0]

    model = XrayDetection.load_from_checkpoint(best_checkpoint_path,
                                               hparams=hparams,
                                               cfg=cfg,
                                               model=model)

    save_best(model, cfg)
コード例 #3
0
ファイル: trainer.py プロジェクト: bleheup/FallGroup20-21
    def loop(self):
        #state = self.env.reset()
        state = self.env.reset(players=self.players,
                               config_dir=self.config_dir,
                               map_file=self.map_file,
                               unit_file=self.unit_file,
                               output_dir=self.env_output_dir,
                               pnames=self.pnames,
                               debug=self.debug)
        num_of_wins = 0
        episode_winrate = 0
        total_games_played = 0
        all_winrate = []
        highest_winrate = 0
        w = tensorboard.SummaryWriter()
        time = datetime.now().strftime('%Y%m%d_%H%M%S')
        path = './runs/{}/'.format(self.output_dir)
        try:
            os.makedirs(path)
        except:
            pass

        for step in range(self.max_steps):
            epsilon = self._exploration(step)
            action_idx = []
            action = {}
            for pid in self.players:
                if pid == self.player_num:
                    # If noisy network or non-random actions are chosen
                    if self.exploration_method == "Noisy" or np.random.random_sample(
                    ) > epsilon:
                        # The action indexes are needed for updating, thus get_action does not suffice
                        action_idx = self.model.get_action_idx(state[pid])
                        action[pid] = np.zeros(
                            (self.env.num_actions_per_turn, 2))
                        for n in range(0, len(action_idx)):
                            action[pid][n][0] = self.action_table[
                                action_idx[n]][0]
                            action[pid][n][1] = self.action_table[
                                action_idx[n]][1]
                    # Performs a random action
                    else:
                        #print("not here")
                        action_idx = np.random.choice(len(self.action_table),
                                                      size=7)
                        action[pid] = np.zeros(
                            (self.env.num_actions_per_turn, 2))
                        for n in range(0, len(action_idx)):
                            action[pid][n][0] = self.action_table[
                                action_idx[n]][0]
                            action[pid][n][1] = self.action_table[
                                action_idx[n]][1]
                else:
                    action[pid] = self.players[pid].get_action(state[pid])

            next_state, reward, done, infos = self.env.step(action)

            if done:
                # Adds copies of self to list of opponents at certain episode counts
                self.episode_cnt += 1
                if self.episode_cnt % self.opp_save_freq == 0:
                    print('ADDING NEW OPPONENT')
                    self.add_opponent()

                self.choose_opponent()
                next_state = self.env.reset(players=self.players,
                                            config_dir=self.config_dir,
                                            map_file=self.map_file,
                                            unit_file=self.unit_file,
                                            output_dir=self.env_output_dir,
                                            pnames=self.pnames,
                                            debug=self.debug)
                if reward[self.player_num] == 1:
                    num_of_wins += 1
                total_games_played += 1
                print("Result on game {}: {}".format(len(all_winrate), reward))
                episode_winrate = (num_of_wins / total_games_played) * 100
                all_winrate.append(episode_winrate)
                with open(os.path.join(path, "rewards-{}.txt".format(time)),
                          'a') as fout:
                    fout.write("{}\n".format(episode_winrate))
                print("Current winrate: {}%".format(episode_winrate))
                w.add_scalar("winrate",
                             episode_winrate,
                             global_step=len(all_winrate))
                if episode_winrate > highest_winrate:
                    highest_winrate = episode_winrate
                    save_best(self.model, all_winrate, "Evergaldes",
                              self.output_dir)

            self.memory.store(state[self.player_num], action_idx,
                              reward[self.player_num],
                              next_state[self.player_num], done)
            state = next_state

            if step > self.start_learning:
                loss = self.model.update_policy(
                    self.memory.miniBatch(self.batch_size), self.memory)
                with open(os.path.join(path, "loss-{}.txt".format(time)),
                          'a') as fout:
                    fout.write("{}\n".format(loss))
                w.add_scalar("loss/loss", loss, global_step=step)

            if step % self.save_update_freq == 0:
                save_checkpoint(self.model, all_winrate, "Evergaldes",
                                self.output_dir)

            if len(all_winrate) == self.max_episodes:
                save_checkpoint(self.model, all_winrate, "Evergaldes",
                                self.output_dir)
                break

        w.close()
コード例 #4
0
    def loop(self):
        player_list = {
            'random_actions': 1, 
            'base_rushV1': 0,
            'Cycle_BRush_Turn25': 0, 
            'Cycle_BRush_Turn50': 0,
            'Cycle_Target_Node': 0,
            'cycle_targetedNode1': 0,
            'cycle_targetedNode11': 0,
            'cycle_targetedNode11P2': 0,
            'same_commands': 0,
            'SwarmAgent': 0
            }
        plist = []

        for p in list(player_list.keys()):
            for i in range(0, player_list[p]):
                plist.append(p)

        state = self.env.reset(
            players=self.players,
            config_dir=self.config_dir,
            map_file=self.map_file,
            unit_file=self.unit_file,
            output_dir=self.env_output_dir,
            pnames=self.pnames,
            debug=self.debug
        )
        num_of_wins = 0
        episode_winrate = 0
        total_games_played = 0
        all_winrate = []
        all_reward = []
        highest_winrate = 0
        w = tensorboard.SummaryWriter()
        time = datetime.now().strftime('%Y%m%d_%H%M%S')
        path = './runs/{}/'.format(self.output_dir)
        try:
            os.makedirs(path)
        except:
            pass
        
        total_turn_played = 0
        turn_played_by_network = 0
        
        for step in range(self.max_steps):
            epsilon = self._exploration(step)
            self.renderer.render(state)
            # print(epsilon)
            
            action_idx = []
            action = {}
            total_turn_played += 1
            for pid in self.players:
                if pid == self.player_num:
                    if self.exploration_method == "Noisy" or np.random.random_sample() > epsilon:
                        action[pid] = self.model.get_action(state[pid])
                        turn_played_by_network += 1
                        
                    else:
                        
                        legal_moves = self.player_helper.legal_moves(state[pid])
                        actions_final = self._get_random(state[pid])

                        for i in range(len(actions_final)):
                            compute_idx = actions_final[i][0]*11 + (actions_final[i][1] - 1)
                            compute_idx = compute_idx.astype(int)
                            if legal_moves[compute_idx] == False:
                                actions_final[i] = [0,0]
                        
                        action[pid] = actions_final
                        
                else:
                    action[pid] = self.players[pid].get_action(state[pid])
            #print(action)
            next_state, reward, done, scores = self.env.step(action)
            
            other_player_id = 0
            if self.player_num == 0:
                other_player_id = 1
            if done:
                if len(all_reward) > 100:
                    all_reward.pop(0)
                all_reward.append(reward[self.player_num])
                for pid in self.players:
                    if pid != self.player_num:
                        #print(plist)
                        self.player_name = random.choice(plist)
                        self._changePlayer(self.player_name, pid)
                        print("Training with {}".format(self.player_name))
                
                if self.isNSteps:
                    self.model.finish_nstep(self.memory)
                next_state = self.env.reset(
                    players=self.players,
                    config_dir=self.config_dir,
                    map_file=self.map_file,
                    unit_file=self.unit_file,
                    output_dir=self.env_output_dir,
                    pnames=self.pnames,
                    debug=self.debug
                )
                print("Result on game {}: {}. Number of moves made by the network: {}/{}. Agents: {}".format(
                    len(all_winrate), reward, turn_played_by_network, total_turn_played, self.player_name))
                # if reward[self.player_num] == 1:
                #     reward[self.player_num] = scores[self.player_num] + 3001
                # else:
                #     reward[self.player_num] = scores[self.player_num] - scores[other_player_id] - 3001
                total_games_played += 1
                num_of_wins = self.get_num_wins(all_reward)
                episode_winrate = (num_of_wins/len(all_reward)) * 100
        
                all_winrate.append(episode_winrate)
                with open(os.path.join(path, "rewards-{}.txt".format(time)), 'a') as fout:
                    fout.write("Winrate last 100: {}. Number of moves made by the network: {}/{}. Agents: {}\n".format(episode_winrate, turn_played_by_network, total_turn_played, self.player_name))
                print("Current winrate last 100: {}%".format(episode_winrate))
                w.add_scalar("winrate",
                             episode_winrate, global_step=len(all_winrate))
                turn_played_by_network = 0
                total_turn_played = 0
                if episode_winrate > highest_winrate:
                    highest_winrate = episode_winrate
                    save_best(self.model, all_winrate,
                              "Evergaldes", self.output_dir)
            # else:
            #     reward[self.player_num] = scores[self.player_num] - scores[other_player_id]
            if self.isNSteps:
                self.model.append_to_replay(self.memory,
                    state[self.player_num],
                    action[self.player_num],
                    reward[self.player_num],
                    next_state[self.player_num],
                    done
                )
            else:
                self.memory.add(
                    state[self.player_num],
                    action[self.player_num],
                    reward[self.player_num],
                    next_state[self.player_num],
                    done
                )
            state = next_state

            if step > self.start_learning:
                loss = self.model.update_policy(
                    self.memory.miniBatch(self.batch_size), self.memory)
                with open(os.path.join(path, "loss-{}.txt".format(time)), 'a') as fout:
                    fout.write("{}\n".format(loss))
                w.add_scalar("loss/loss", loss, global_step=step)

            if step % self.save_update_freq == 0:
                save_checkpoint(self.model, all_winrate,
                                "Evergaldes", self.output_dir)

            if len(all_winrate) == self.max_episodes:
                save_checkpoint(self.model, all_winrate,
                                "Evergaldes", self.output_dir)
                break

        w.close()
コード例 #5
0
def train_fn(args, run_epoch, eval_fn, create_model, create_dataloaders):
  
  reset_wandb_env()
  
  # W&B
  if args.n_folds:
    job_type = os.path.basename(os.path.normpath(args.save_dir))
    run_name = job_type+f'-{args.fold_number}'
    args.save_dir = os.path.join(args.save_dir, f'{args.fold_number}')
    
    # run = wandb.init(name=run_name, config=args, project='pdf-clustering', tags=[args.model_type], group=args.experiment, job_type=job_type, settings=wandb.Settings(start_method="fork"))
    run = wandb.init(name=run_name, config=args, project='pdf-clustering', tags=[args.model_type], group=args.experiment, job_type=job_type) 
    
  else:
    
    run = wandb.init(name=args.save_dir[8:], config=args, group=args.experiment, project='pdf-clustering', tags=[args.model_type])
    
  # Create save_dir
  if not os.path.exists(args.save_dir):
    os.makedirs(args.save_dir)

  # Save args
  with open(os.path.join(args.save_dir, 'args.json'), 'w') as f:
    json.dump(vars(args), f, sort_keys=True, indent=2)

  # Logging
  logger = create_logger(args.save_dir)
  if args.n_folds:
    logger.info(f'fold: {args.fold_number+1}/{args.n_folds}')
  
  # gpu
  if torch.cuda.is_available():
    device = 'cuda'
  else:
    device = 'cpu'

  # Data
  train_dl, early_stop_dl, val_dl, val_dl_cluster, d_len = create_dataloaders(args)
  logger.info(f'train dataset: {d_len[0]} sets')
  logger.info(f'dataset for early stopping: {d_len[1]} sets')
  logger.info(f'validation dataset: {d_len[2]} sets')
  
  # Prepare early stop
  stopped = False
  best_epoch = 0
  best_metrics_log = {}
  best_loss = torch.Tensor([float('Inf')])
  
  model, criterion, optimizer = create_model(args)
  model = model.to(device)
  
  run.watch(model)

  # Print args
  logger.info('using args: \n' + json.dumps(vars(args), sort_keys=True, indent=2))

  # Measure time to train model (count until best metrics achieved)
  tick = time.time()
  # epochs
  for t in range(args.n_epochs):

    # Run epoch
    metrics_log = run_epoch(t, args, model, criterion, optimizer, train_dl, early_stop_dl, val_dl, val_dl_cluster, eval_fn, device)
        
    # Print log
    if ((t+1)%args.print_freq == 0) or (t==0):
      log_message = create_log_message(metrics_log, t)
      logger.info(log_message)

    # W&B
    run.log(metrics_log, step=t)
      
    # Save best model
    if metrics_log['early_stop_loss'] < best_loss:
      best_loss, best_epoch = metrics_log['early_stop_loss'], t
      
      # Best
      best_metrics_log = metrics_log
      best_metrics_log['time_to_best_metrics'] = time.time() - tick
      save_best(args, t, model, best_metrics_log)

    # Check early stop
    if t >= best_epoch + args.early_stop:
      logger.info('EARLY STOP')
      break
      
  # End of training -> compute and log best validation metrics
  logger.info(f"Training ended: loading best model and computing it's metrics.")

  checkpoint = torch.load(os.path.join(args.save_dir, 'checkpoint.pt.tar'))
  model.load_state_dict(checkpoint['model'])

  time_to_best_metrics = best_metrics_log['time_to_best_metrics']
  tick_eval = time.time()
  best_metrics_log = eval_fn(model, criterion, val_dl_cluster, args, device, True)
  time_to_eval = time.time()-tick_eval
                      
  # Log best validation metrics
  best_metrics_log_edited = {}
  for entry in best_metrics_log:
    best_metrics_log_edited['best_'+entry] = best_metrics_log[entry]

  best_metrics_log['time_to_best_metrics'] = time_to_best_metrics
  best_metrics_log['time_to_eval'] = time_to_eval
  best_metrics_log['time'] = time.time() - tick
  
  logger.info(f'Metrics for best early stop loss:\n {best_metrics_log}')
  wandb.log(best_metrics_log_edited, step=t+1)
  wandb.log(best_metrics_log, step=t+1)
  
  # Only save model if args.save_model is set to True (to save memory space on RCI)
  if args.save_model:
    logger.info('Training finished successfully. Best model is saved at {}'.format(
        os.path.join(args.save_dir, 'checkpoint.pt.tar')))
    
    # Save metrics as well
    checkpoint = torch.load(os.path.join(args.save_dir, 'checkpoint.pt.tar'))
    checkpoint['best_metrics_log'] = best_metrics_log
    
    torch.save(checkpoint, os.path.join(args.save_dir, 'checkpoint.pt.tar'))
    logger.info('Final metrics save done.')
    
  else:
    logger.info('Training finished successfully.')
    
    # Delete model to save space
    checkpoint = torch.load(os.path.join(args.save_dir, 'checkpoint.pt.tar'))
    checkpoint['model'] = None
    
    # Save only metrics
    checkpoint['best_metrics_log'] = best_metrics_log
    torch.save(checkpoint, os.path.join(args.save_dir, 'checkpoint.pt.tar'))
    logger.info('Final metrics save done.')

  run.join()
  logger.handlers = []
コード例 #6
0
def train(args):
    # Init wandb
    run = wandb.init(name=args.save_dir[len('../runs/'):],
                     config=args,
                     project='sign-language-recognition')

    # Create directory for model checkpoints and log
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # Save args
    with open(os.path.join(args.save_dir, 'args.json'), 'w') as f:
        json.dump(vars(args), f, sort_keys=True, indent=2)

    # Logger
    logger = create_logger(args.save_dir)

    # Set gpu
    if torch.cuda.is_available():
        i = get_free_gpu()
        device = get_device(gpu=i)
    else:
        device = 'cpu'
    logger.info('using device: {}'.format(device))

    # Prepare early stop
    stopped = False
    best_epoch = 0
    best_loss = torch.Tensor([float('Inf')])

    # Data

    if args.freeze_vgg:
        real_batch_size = 3
    else:
        real_batch_size = 2  # can't fit more into gpu memory

    json_file = os.path.join(args.data_path, 'WLASL_v0.3.json')
    videos_folder = os.path.join(args.data_path, 'videos')
    keypoints_folder = os.path.join(args.data_path, 'keypoints')
    train_transforms = transforms.Compose([videotransforms.RandomCrop(224)])
    val_transforms = train_transforms

    # Debug data
    if args.debug_dataset:
        train_dataset = WLASL(json_file=json_file,
                              videos_folder=videos_folder,
                              keypoints_folder=keypoints_folder,
                              transforms=train_transforms,
                              split='train',
                              subset=args.subset)
        train_dl = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=real_batch_size,
                                               sampler=DebugSampler(
                                                   args.debug_dataset,
                                                   len(train_dataset)))
        val_dl = train_dl
    else:
        train_dataset = WLASL(json_file=json_file,
                              videos_folder=videos_folder,
                              keypoints_folder=keypoints_folder,
                              transforms=train_transforms,
                              split='train',
                              subset=args.subset)
        train_dl = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=real_batch_size,
                                               shuffle=True)

        val_dataset = WLASL(json_file=json_file,
                            videos_folder=videos_folder,
                            keypoints_folder=keypoints_folder,
                            transforms=val_transforms,
                            split='val',
                            subset=args.subset)
        val_dl = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=real_batch_size,
                                             shuffle=True)
    logger.info('data loaded')

    # Model, loss, optimizer
    m = Conv2dRNN(args).to(device)
    optimizer = torch.optim.Adam(m.parameters(), lr=args.lr)
    criterion = nn.CrossEntropyLoss()

    # Resume train
    start_epoch = 0
    if args.resume_train:
        checkpoint = torch.load(os.path.join(args.save_dir,
                                             'checkpoint.pt.tar'),
                                map_location=torch.device('cpu'))
        best_epoch = checkpoint['epoch']
        m.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        m = m.to(device)
        best_loss = checkpoint['best_val_loss']
        start_epoch = best_epoch + 1

        # Change learning rate
        for g in optimizer.param_groups:
            g['lr'] = args.lr

        logger.info(
            'Resuming training from epoch {} with best loss {:.4f}'.format(
                start_epoch, best_loss))

    # learning rate scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        factor=args.lr_schedule_factor,
        patience=args.lr_schedule_patience,
        threshold=args.lr_schedule_threshold)

    # Watch model with wandb
    run.watch(m, log='all', log_freq=5)

    # Print args
    logger.info('using args: \n' +
                json.dumps(vars(args), sort_keys=True, indent=2))

    # Train loop
    for t in range(args.n_epochs):
        t += start_epoch
        # Train
        losses = AverageMeter()
        batch_time = AverageMeter()
        m.train()

        start_t = time.time()
        for i, batch in enumerate(train_dl):

            # Run the forward pass multiple times and accumulate gradient (to be able to use large batch size)
            X = batch['X'].to(device)
            label = batch['label'].to(device)

            # [per frame logits, mean of all frames logits]
            logits = m(X)

            # Create label for each logit
            label = torch.cat([l.repeat(logits.shape[1], 1) for l in label],
                              dim=0)

            # Squeeze time sequence and batch into one dimension
            logits = logits.reshape(logits.shape[0] * logits.shape[1],
                                    logits.shape[2])

            loss = criterion(logits, label.squeeze())
            loss.backward()
            losses.update(loss.item())

            if (i % (args.batch_size // real_batch_size)) == 0:
                # Optimize with accumulated gradient
                optimizer.step()
                optimizer.zero_grad()

                batch_time.update(time.time() - start_t)
                start_t = time.time()

        train_loss = losses.avg

        # Validate
        with torch.no_grad():
            top1 = AverageMeter()
            top5 = AverageMeter()
            top10 = AverageMeter()
            losses = AverageMeter()

            m.eval()
            for batch in val_dl:
                X = batch['X'].to(device)
                label = batch['label'].to(device)

                # [per frame logits, mean of all frames logits]
                logits = m(X)

                # Create label for each logit
                label = torch.cat(
                    [l.repeat(logits.shape[1], 1) for l in label], dim=0)

                # Squeeze time sequence and batch into one dimension
                logits = logits.reshape(logits.shape[0] * logits.shape[1],
                                        logits.shape[2])

                losses.update(criterion(logits, label.squeeze()).item())

                # Update metrics
                acc1, acc5, acc10 = topk_accuracy(logits,
                                                  label,
                                                  topk=(1, 5, 10))
                top1.update(acc1.item())
                top5.update(acc5.item())
                top10.update(acc10.item())

            val_loss = losses.avg

            # Save best model
            if val_loss < best_loss:
                best_loss, best_epoch = val_loss, t
                save_best(args, t, m, optimizer, best_loss)

            # Check early stop
            if t >= best_epoch + args.early_stop:
                logger.info('EARLY STOP')
                break

        # Log info
        logger.info(
            'epoch: {} train loss: {:.4f} val loss: {:.4f} top1acc {:.4f} top5acc {:.4f} top10acc {:.4f} lr: {:.2e} time per batch {:.1f} s'
            .format(t + 1, train_loss, val_loss, top1.avg, top5.avg, top10.avg,
                    optimizer.param_groups[0]['lr'], batch_time.avg))

        # Wandb log
        run.log({
            'train_loss': train_loss,
            'val_loss': val_loss,
            'top1_acc': top1.avg,
            'top5_acc': top5.avg,
            'top10_acc': top10.avg,
            'lr': optimizer.param_groups[0]['lr']
        })

        # Scheduler step
        if args.use_lr_scheduler:
            scheduler.step(val_loss)