def loop(self): state = self.env.reset() episode_reward = 0 best_episode_reward = None all_rewards = [] w = tensorboard.SummaryWriter() for step in range(self.max_steps): epsilon = self._exploration(step) if np.random.random() > epsilon: action = self.model.get_action(state) else: action = np.random.randint(0, self.model.action_bins, size=self.model.action_space) next_state, reward, done, infos = self.env.step(action) episode_reward += reward if done: next_state = self.env.reset() all_rewards.append(episode_reward) print("Reward on Episode {}: {}".format( len(all_rewards), episode_reward)) w.add_scalar("reward/episode_reward", episode_reward, global_step=len(all_rewards)) if best_episode_reward == None or episode_reward > best_episode_reward: best_episode_reward = episode_reward save_best(self.model, all_rewards, self.env.name, self.output_dir) episode_reward = 0 self.memory.push( (state.reshape(-1).numpy().tolist(), action, reward, next_state.reshape(-1).numpy().tolist(), 0. if done else 1.)) state = next_state if step > self.start_learning: loss = self.model.update_policy( self.memory.sample(self.batch_size)) w.add_scalar("loss/loss", loss, global_step=step) if step % self.save_update_freq == 0: save_checkpoint(self.model, all_rewards, self.env.name, self.output_dir) if len(all_rewards) == self.max_episodes: save_checkpoint(self.model, all_rewards, self.env.name, self.output_dir) break w.close()
def train(cfg: DictConfig) -> None: """ Run model training. Parameters ---------- cfg : DictConfig Project configuration object """ model = load_obj(cfg.model.backbone.class_name) model = model(**cfg.model.backbone.params) # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features head = load_obj(cfg.model.head.class_name) # replace the pre-trained head with a new one model.roi_heads.box_predictor = head(in_features, cfg.model.head.params.num_classes) set_seed(cfg.training.seed) hparams = flatten_omegaconf(cfg) xray_detection = XrayDetection(hparams=hparams, cfg=cfg, model=model) callbacks = xray_detection.get_callbacks() loggers = xray_detection.get_loggers() trainer = pl.Trainer( logger=loggers, early_stop_callback=callbacks["early_stopping"], checkpoint_callback=callbacks["model_checkpoint"], **cfg.trainer, ) trainer.fit(xray_detection) # Load the best checkpoint get_logger().info("Saving model from the best checkpoint...") checkpoints = [ ckpt for ckpt in os.listdir("./") if ckpt.endswith(".ckpt") and ckpt != "last.ckpt" ] best_checkpoint_path = checkpoints[0] model = XrayDetection.load_from_checkpoint(best_checkpoint_path, hparams=hparams, cfg=cfg, model=model) save_best(model, cfg)
def loop(self): #state = self.env.reset() state = self.env.reset(players=self.players, config_dir=self.config_dir, map_file=self.map_file, unit_file=self.unit_file, output_dir=self.env_output_dir, pnames=self.pnames, debug=self.debug) num_of_wins = 0 episode_winrate = 0 total_games_played = 0 all_winrate = [] highest_winrate = 0 w = tensorboard.SummaryWriter() time = datetime.now().strftime('%Y%m%d_%H%M%S') path = './runs/{}/'.format(self.output_dir) try: os.makedirs(path) except: pass for step in range(self.max_steps): epsilon = self._exploration(step) action_idx = [] action = {} for pid in self.players: if pid == self.player_num: # If noisy network or non-random actions are chosen if self.exploration_method == "Noisy" or np.random.random_sample( ) > epsilon: # The action indexes are needed for updating, thus get_action does not suffice action_idx = self.model.get_action_idx(state[pid]) action[pid] = np.zeros( (self.env.num_actions_per_turn, 2)) for n in range(0, len(action_idx)): action[pid][n][0] = self.action_table[ action_idx[n]][0] action[pid][n][1] = self.action_table[ action_idx[n]][1] # Performs a random action else: #print("not here") action_idx = np.random.choice(len(self.action_table), size=7) action[pid] = np.zeros( (self.env.num_actions_per_turn, 2)) for n in range(0, len(action_idx)): action[pid][n][0] = self.action_table[ action_idx[n]][0] action[pid][n][1] = self.action_table[ action_idx[n]][1] else: action[pid] = self.players[pid].get_action(state[pid]) next_state, reward, done, infos = self.env.step(action) if done: # Adds copies of self to list of opponents at certain episode counts self.episode_cnt += 1 if self.episode_cnt % self.opp_save_freq == 0: print('ADDING NEW OPPONENT') self.add_opponent() self.choose_opponent() next_state = self.env.reset(players=self.players, config_dir=self.config_dir, map_file=self.map_file, unit_file=self.unit_file, output_dir=self.env_output_dir, pnames=self.pnames, debug=self.debug) if reward[self.player_num] == 1: num_of_wins += 1 total_games_played += 1 print("Result on game {}: {}".format(len(all_winrate), reward)) episode_winrate = (num_of_wins / total_games_played) * 100 all_winrate.append(episode_winrate) with open(os.path.join(path, "rewards-{}.txt".format(time)), 'a') as fout: fout.write("{}\n".format(episode_winrate)) print("Current winrate: {}%".format(episode_winrate)) w.add_scalar("winrate", episode_winrate, global_step=len(all_winrate)) if episode_winrate > highest_winrate: highest_winrate = episode_winrate save_best(self.model, all_winrate, "Evergaldes", self.output_dir) self.memory.store(state[self.player_num], action_idx, reward[self.player_num], next_state[self.player_num], done) state = next_state if step > self.start_learning: loss = self.model.update_policy( self.memory.miniBatch(self.batch_size), self.memory) with open(os.path.join(path, "loss-{}.txt".format(time)), 'a') as fout: fout.write("{}\n".format(loss)) w.add_scalar("loss/loss", loss, global_step=step) if step % self.save_update_freq == 0: save_checkpoint(self.model, all_winrate, "Evergaldes", self.output_dir) if len(all_winrate) == self.max_episodes: save_checkpoint(self.model, all_winrate, "Evergaldes", self.output_dir) break w.close()
def loop(self): player_list = { 'random_actions': 1, 'base_rushV1': 0, 'Cycle_BRush_Turn25': 0, 'Cycle_BRush_Turn50': 0, 'Cycle_Target_Node': 0, 'cycle_targetedNode1': 0, 'cycle_targetedNode11': 0, 'cycle_targetedNode11P2': 0, 'same_commands': 0, 'SwarmAgent': 0 } plist = [] for p in list(player_list.keys()): for i in range(0, player_list[p]): plist.append(p) state = self.env.reset( players=self.players, config_dir=self.config_dir, map_file=self.map_file, unit_file=self.unit_file, output_dir=self.env_output_dir, pnames=self.pnames, debug=self.debug ) num_of_wins = 0 episode_winrate = 0 total_games_played = 0 all_winrate = [] all_reward = [] highest_winrate = 0 w = tensorboard.SummaryWriter() time = datetime.now().strftime('%Y%m%d_%H%M%S') path = './runs/{}/'.format(self.output_dir) try: os.makedirs(path) except: pass total_turn_played = 0 turn_played_by_network = 0 for step in range(self.max_steps): epsilon = self._exploration(step) self.renderer.render(state) # print(epsilon) action_idx = [] action = {} total_turn_played += 1 for pid in self.players: if pid == self.player_num: if self.exploration_method == "Noisy" or np.random.random_sample() > epsilon: action[pid] = self.model.get_action(state[pid]) turn_played_by_network += 1 else: legal_moves = self.player_helper.legal_moves(state[pid]) actions_final = self._get_random(state[pid]) for i in range(len(actions_final)): compute_idx = actions_final[i][0]*11 + (actions_final[i][1] - 1) compute_idx = compute_idx.astype(int) if legal_moves[compute_idx] == False: actions_final[i] = [0,0] action[pid] = actions_final else: action[pid] = self.players[pid].get_action(state[pid]) #print(action) next_state, reward, done, scores = self.env.step(action) other_player_id = 0 if self.player_num == 0: other_player_id = 1 if done: if len(all_reward) > 100: all_reward.pop(0) all_reward.append(reward[self.player_num]) for pid in self.players: if pid != self.player_num: #print(plist) self.player_name = random.choice(plist) self._changePlayer(self.player_name, pid) print("Training with {}".format(self.player_name)) if self.isNSteps: self.model.finish_nstep(self.memory) next_state = self.env.reset( players=self.players, config_dir=self.config_dir, map_file=self.map_file, unit_file=self.unit_file, output_dir=self.env_output_dir, pnames=self.pnames, debug=self.debug ) print("Result on game {}: {}. Number of moves made by the network: {}/{}. Agents: {}".format( len(all_winrate), reward, turn_played_by_network, total_turn_played, self.player_name)) # if reward[self.player_num] == 1: # reward[self.player_num] = scores[self.player_num] + 3001 # else: # reward[self.player_num] = scores[self.player_num] - scores[other_player_id] - 3001 total_games_played += 1 num_of_wins = self.get_num_wins(all_reward) episode_winrate = (num_of_wins/len(all_reward)) * 100 all_winrate.append(episode_winrate) with open(os.path.join(path, "rewards-{}.txt".format(time)), 'a') as fout: fout.write("Winrate last 100: {}. Number of moves made by the network: {}/{}. Agents: {}\n".format(episode_winrate, turn_played_by_network, total_turn_played, self.player_name)) print("Current winrate last 100: {}%".format(episode_winrate)) w.add_scalar("winrate", episode_winrate, global_step=len(all_winrate)) turn_played_by_network = 0 total_turn_played = 0 if episode_winrate > highest_winrate: highest_winrate = episode_winrate save_best(self.model, all_winrate, "Evergaldes", self.output_dir) # else: # reward[self.player_num] = scores[self.player_num] - scores[other_player_id] if self.isNSteps: self.model.append_to_replay(self.memory, state[self.player_num], action[self.player_num], reward[self.player_num], next_state[self.player_num], done ) else: self.memory.add( state[self.player_num], action[self.player_num], reward[self.player_num], next_state[self.player_num], done ) state = next_state if step > self.start_learning: loss = self.model.update_policy( self.memory.miniBatch(self.batch_size), self.memory) with open(os.path.join(path, "loss-{}.txt".format(time)), 'a') as fout: fout.write("{}\n".format(loss)) w.add_scalar("loss/loss", loss, global_step=step) if step % self.save_update_freq == 0: save_checkpoint(self.model, all_winrate, "Evergaldes", self.output_dir) if len(all_winrate) == self.max_episodes: save_checkpoint(self.model, all_winrate, "Evergaldes", self.output_dir) break w.close()
def train_fn(args, run_epoch, eval_fn, create_model, create_dataloaders): reset_wandb_env() # W&B if args.n_folds: job_type = os.path.basename(os.path.normpath(args.save_dir)) run_name = job_type+f'-{args.fold_number}' args.save_dir = os.path.join(args.save_dir, f'{args.fold_number}') # run = wandb.init(name=run_name, config=args, project='pdf-clustering', tags=[args.model_type], group=args.experiment, job_type=job_type, settings=wandb.Settings(start_method="fork")) run = wandb.init(name=run_name, config=args, project='pdf-clustering', tags=[args.model_type], group=args.experiment, job_type=job_type) else: run = wandb.init(name=args.save_dir[8:], config=args, group=args.experiment, project='pdf-clustering', tags=[args.model_type]) # Create save_dir if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # Save args with open(os.path.join(args.save_dir, 'args.json'), 'w') as f: json.dump(vars(args), f, sort_keys=True, indent=2) # Logging logger = create_logger(args.save_dir) if args.n_folds: logger.info(f'fold: {args.fold_number+1}/{args.n_folds}') # gpu if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' # Data train_dl, early_stop_dl, val_dl, val_dl_cluster, d_len = create_dataloaders(args) logger.info(f'train dataset: {d_len[0]} sets') logger.info(f'dataset for early stopping: {d_len[1]} sets') logger.info(f'validation dataset: {d_len[2]} sets') # Prepare early stop stopped = False best_epoch = 0 best_metrics_log = {} best_loss = torch.Tensor([float('Inf')]) model, criterion, optimizer = create_model(args) model = model.to(device) run.watch(model) # Print args logger.info('using args: \n' + json.dumps(vars(args), sort_keys=True, indent=2)) # Measure time to train model (count until best metrics achieved) tick = time.time() # epochs for t in range(args.n_epochs): # Run epoch metrics_log = run_epoch(t, args, model, criterion, optimizer, train_dl, early_stop_dl, val_dl, val_dl_cluster, eval_fn, device) # Print log if ((t+1)%args.print_freq == 0) or (t==0): log_message = create_log_message(metrics_log, t) logger.info(log_message) # W&B run.log(metrics_log, step=t) # Save best model if metrics_log['early_stop_loss'] < best_loss: best_loss, best_epoch = metrics_log['early_stop_loss'], t # Best best_metrics_log = metrics_log best_metrics_log['time_to_best_metrics'] = time.time() - tick save_best(args, t, model, best_metrics_log) # Check early stop if t >= best_epoch + args.early_stop: logger.info('EARLY STOP') break # End of training -> compute and log best validation metrics logger.info(f"Training ended: loading best model and computing it's metrics.") checkpoint = torch.load(os.path.join(args.save_dir, 'checkpoint.pt.tar')) model.load_state_dict(checkpoint['model']) time_to_best_metrics = best_metrics_log['time_to_best_metrics'] tick_eval = time.time() best_metrics_log = eval_fn(model, criterion, val_dl_cluster, args, device, True) time_to_eval = time.time()-tick_eval # Log best validation metrics best_metrics_log_edited = {} for entry in best_metrics_log: best_metrics_log_edited['best_'+entry] = best_metrics_log[entry] best_metrics_log['time_to_best_metrics'] = time_to_best_metrics best_metrics_log['time_to_eval'] = time_to_eval best_metrics_log['time'] = time.time() - tick logger.info(f'Metrics for best early stop loss:\n {best_metrics_log}') wandb.log(best_metrics_log_edited, step=t+1) wandb.log(best_metrics_log, step=t+1) # Only save model if args.save_model is set to True (to save memory space on RCI) if args.save_model: logger.info('Training finished successfully. Best model is saved at {}'.format( os.path.join(args.save_dir, 'checkpoint.pt.tar'))) # Save metrics as well checkpoint = torch.load(os.path.join(args.save_dir, 'checkpoint.pt.tar')) checkpoint['best_metrics_log'] = best_metrics_log torch.save(checkpoint, os.path.join(args.save_dir, 'checkpoint.pt.tar')) logger.info('Final metrics save done.') else: logger.info('Training finished successfully.') # Delete model to save space checkpoint = torch.load(os.path.join(args.save_dir, 'checkpoint.pt.tar')) checkpoint['model'] = None # Save only metrics checkpoint['best_metrics_log'] = best_metrics_log torch.save(checkpoint, os.path.join(args.save_dir, 'checkpoint.pt.tar')) logger.info('Final metrics save done.') run.join() logger.handlers = []
def train(args): # Init wandb run = wandb.init(name=args.save_dir[len('../runs/'):], config=args, project='sign-language-recognition') # Create directory for model checkpoints and log if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # Save args with open(os.path.join(args.save_dir, 'args.json'), 'w') as f: json.dump(vars(args), f, sort_keys=True, indent=2) # Logger logger = create_logger(args.save_dir) # Set gpu if torch.cuda.is_available(): i = get_free_gpu() device = get_device(gpu=i) else: device = 'cpu' logger.info('using device: {}'.format(device)) # Prepare early stop stopped = False best_epoch = 0 best_loss = torch.Tensor([float('Inf')]) # Data if args.freeze_vgg: real_batch_size = 3 else: real_batch_size = 2 # can't fit more into gpu memory json_file = os.path.join(args.data_path, 'WLASL_v0.3.json') videos_folder = os.path.join(args.data_path, 'videos') keypoints_folder = os.path.join(args.data_path, 'keypoints') train_transforms = transforms.Compose([videotransforms.RandomCrop(224)]) val_transforms = train_transforms # Debug data if args.debug_dataset: train_dataset = WLASL(json_file=json_file, videos_folder=videos_folder, keypoints_folder=keypoints_folder, transforms=train_transforms, split='train', subset=args.subset) train_dl = torch.utils.data.DataLoader(train_dataset, batch_size=real_batch_size, sampler=DebugSampler( args.debug_dataset, len(train_dataset))) val_dl = train_dl else: train_dataset = WLASL(json_file=json_file, videos_folder=videos_folder, keypoints_folder=keypoints_folder, transforms=train_transforms, split='train', subset=args.subset) train_dl = torch.utils.data.DataLoader(train_dataset, batch_size=real_batch_size, shuffle=True) val_dataset = WLASL(json_file=json_file, videos_folder=videos_folder, keypoints_folder=keypoints_folder, transforms=val_transforms, split='val', subset=args.subset) val_dl = torch.utils.data.DataLoader(val_dataset, batch_size=real_batch_size, shuffle=True) logger.info('data loaded') # Model, loss, optimizer m = Conv2dRNN(args).to(device) optimizer = torch.optim.Adam(m.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss() # Resume train start_epoch = 0 if args.resume_train: checkpoint = torch.load(os.path.join(args.save_dir, 'checkpoint.pt.tar'), map_location=torch.device('cpu')) best_epoch = checkpoint['epoch'] m.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) m = m.to(device) best_loss = checkpoint['best_val_loss'] start_epoch = best_epoch + 1 # Change learning rate for g in optimizer.param_groups: g['lr'] = args.lr logger.info( 'Resuming training from epoch {} with best loss {:.4f}'.format( start_epoch, best_loss)) # learning rate scheduler scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=args.lr_schedule_factor, patience=args.lr_schedule_patience, threshold=args.lr_schedule_threshold) # Watch model with wandb run.watch(m, log='all', log_freq=5) # Print args logger.info('using args: \n' + json.dumps(vars(args), sort_keys=True, indent=2)) # Train loop for t in range(args.n_epochs): t += start_epoch # Train losses = AverageMeter() batch_time = AverageMeter() m.train() start_t = time.time() for i, batch in enumerate(train_dl): # Run the forward pass multiple times and accumulate gradient (to be able to use large batch size) X = batch['X'].to(device) label = batch['label'].to(device) # [per frame logits, mean of all frames logits] logits = m(X) # Create label for each logit label = torch.cat([l.repeat(logits.shape[1], 1) for l in label], dim=0) # Squeeze time sequence and batch into one dimension logits = logits.reshape(logits.shape[0] * logits.shape[1], logits.shape[2]) loss = criterion(logits, label.squeeze()) loss.backward() losses.update(loss.item()) if (i % (args.batch_size // real_batch_size)) == 0: # Optimize with accumulated gradient optimizer.step() optimizer.zero_grad() batch_time.update(time.time() - start_t) start_t = time.time() train_loss = losses.avg # Validate with torch.no_grad(): top1 = AverageMeter() top5 = AverageMeter() top10 = AverageMeter() losses = AverageMeter() m.eval() for batch in val_dl: X = batch['X'].to(device) label = batch['label'].to(device) # [per frame logits, mean of all frames logits] logits = m(X) # Create label for each logit label = torch.cat( [l.repeat(logits.shape[1], 1) for l in label], dim=0) # Squeeze time sequence and batch into one dimension logits = logits.reshape(logits.shape[0] * logits.shape[1], logits.shape[2]) losses.update(criterion(logits, label.squeeze()).item()) # Update metrics acc1, acc5, acc10 = topk_accuracy(logits, label, topk=(1, 5, 10)) top1.update(acc1.item()) top5.update(acc5.item()) top10.update(acc10.item()) val_loss = losses.avg # Save best model if val_loss < best_loss: best_loss, best_epoch = val_loss, t save_best(args, t, m, optimizer, best_loss) # Check early stop if t >= best_epoch + args.early_stop: logger.info('EARLY STOP') break # Log info logger.info( 'epoch: {} train loss: {:.4f} val loss: {:.4f} top1acc {:.4f} top5acc {:.4f} top10acc {:.4f} lr: {:.2e} time per batch {:.1f} s' .format(t + 1, train_loss, val_loss, top1.avg, top5.avg, top10.avg, optimizer.param_groups[0]['lr'], batch_time.avg)) # Wandb log run.log({ 'train_loss': train_loss, 'val_loss': val_loss, 'top1_acc': top1.avg, 'top5_acc': top5.avg, 'top10_acc': top10.avg, 'lr': optimizer.param_groups[0]['lr'] }) # Scheduler step if args.use_lr_scheduler: scheduler.step(val_loss)