def one_fold(fold_int, is_nine_folds): fold_id = str(fold_int) if is_nine_folds: fold_path = 'data/Folds_9_Emotions/fold_' + fold_id num_labels = 9 else: fold_path = 'data/Folds/fold_' + fold_id num_labels = 16 vocab_size = 5000 pad_len = 30 batch_size = 64 embedding_dim = 200 hidden_dim = 600 es = EarlyStop(2) word2id, id2word = build_vocab(fold_path, vocab_size, use_unk=True) train_data = DataSet(os.path.join(fold_path, 'train.csv'), pad_len, word2id, num_labels) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) test_data = DataSet(os.path.join(fold_path, 'test.csv'), pad_len, word2id, num_labels) test_loader = DataLoader(test_data, batch_size=batch_size) model = AttentionLSTMClassifier(embedding_dim, hidden_dim, vocab_size, word2id, num_labels, batch_size) model.load_glove_embedding(id2word) model.cuda() optimizer = optim.Adam(model.parameters()) loss_criterion = nn.MSELoss() for epoch in range(4): print('Epoch:', epoch, '===================================') train_loss = 0 for i, (data, seq_len, label) in enumerate(train_loader): data, label, seq_len = sort_batch(data, label, seq_len.view(-1)) y_pred = model(Variable(data).cuda(), seq_len) optimizer.zero_grad() loss = loss_criterion(y_pred, Variable(label).cuda()) loss.backward() optimizer.step() train_loss += loss.data[0] pred_list = [] gold_list = [] test_loss = 0 for i, (data, seq_len, label) in enumerate(test_loader): data, label, seq_len = sort_batch(data, label, seq_len.view(-1)) y_pred = model(Variable(data, volatile=True).cuda(), seq_len) loss = loss_criterion(y_pred, Variable(label, volatile=True).cuda()) test_loss += loss.data[0] pred_list.append(y_pred.data.cpu().numpy()) gold_list.append(label.numpy()) print("Train Loss: ", train_loss, " Evaluation: ", test_loss) es.new_loss(test_loss) if es.if_stop(): print('Start over fitting') break
def one_fold(X_train, y_train, X_test, y_test): num_labels = NUM_CLASS vocab_size = 20000 pad_len = 30 batch_size = 100 embedding_dim = 200 hidden_dim = 400 __use_unk = False es = EarlyStop(2) word2id, id2word = build_vocab(X_train, vocab_size, use_unk=__use_unk) train_data = DataSet(X_train, y_train, pad_len, word2id, num_labels, use_unk=__use_unk) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False) test_data = DataSet(X_test, y_test, pad_len, word2id, num_labels, use_unk=__use_unk) test_loader = DataLoader(test_data, batch_size=batch_size) model = AttentionLSTMClassifier(embedding_dim, hidden_dim, vocab_size, word2id, num_labels, batch_size, use_att=True) model.load_glove_embedding(id2word) model.cuda() optimizer = optim.Adam(model.parameters()) loss_criterion = nn.MSELoss() for epoch in range(4): print('Epoch:', epoch, '===================================') train_loss = 0 for i, (data, seq_len, label) in enumerate(train_loader): # print(i) data, label, seq_len = sort_batch(data, label, seq_len.view(-1)) y_pred = model(Variable(data).cuda(), seq_len) optimizer.zero_grad() loss = loss_criterion(y_pred, Variable(label).cuda()) loss.backward() optimizer.step() train_loss += loss.data[0] pred_list = [] gold_list = [] test_loss = 0 for i, (data, seq_len, label) in enumerate(test_loader): data, label, seq_len = sort_batch(data, label, seq_len.view(-1)) y_pred = model(Variable(data, volatile=True).cuda(), seq_len) loss = loss_criterion(y_pred, Variable(label, volatile=True).cuda()) test_loss += loss.data[0] pred_list.append(y_pred.data.cpu().numpy()) gold_list.append(label.numpy()) print("Train Loss: ", train_loss, " Evaluation: ", test_loss) es.new_loss(test_loss) if es.if_stop(): print('Start over fitting') break return np.concatenate(pred_list, axis=0), np.concatenate(gold_list, axis=0)
def load_config(self, config_path: Path, data_path: Path) -> None: with config_path.open() as f: self.config = config = parse_config(yaml.load(f)) if self.device == 'cpu': warnings.warn("Using CPU will be slow") elif self.dist: if self.gpu is not None: config['mini_batch_size'] = int(config['mini_batch_size'] / self.ngpus_per_node) config['num_workers'] = int( (config['num_workers'] + self.ngpus_per_node - 1) / self.ngpus_per_node) self.train_loader, self.train_sampler,\ self.test_loader, self.num_classes = get_data( name=config['dataset'], root=data_path, mini_batch_size=config['mini_batch_size'], num_workers=config['num_workers'], dist=self.dist) self.criterion = torch.nn.CrossEntropyLoss().cuda(self.gpu) if \ config['loss'] == 'cross_entropy' else None if np.less(float(config['early_stop_threshold']), 0): print("AdaS: Notice: early stop will not be used as it was " + f"set to {config['early_stop_threshold']}, " + "training till completion") elif config['optimizer'] != 'SGD' and \ config['scheduler'] != 'AdaS': print("AdaS: Notice: early stop will not be used as it is not " + "SGD with AdaS, training till completion") config['early_stop_threshold'] = -1. self.early_stop = EarlyStop( patience=int(config['early_stop_patience']), threshold=float(config['early_stop_threshold'])) cudnn.benchmark = True if self.resume is not None: if self.gpu is None: self.checkpoint = torch.load(str(self.resume)) else: self.checkpoint = torch.load(str(self.resume), map_location=f'cuda:{self.gpu}') self.start_epoch = self.checkpoint['epoch'] self.start_trial = self.checkpoint['trial'] self.best_acc1 = self.checkpoint['best_acc1'] print(f'Resuming config for trial {self.start_trial} at ' + f'epoch {self.start_epoch}')
def one_fold(fold_int, is_nine_folds): fold_id = str(fold_int) if is_nine_folds: fold_path = 'data/Folds_9_Emotions/fold_' + fold_id num_labels = 9 else: fold_path = 'data/Folds/fold_' + fold_id num_labels = 16 vocab_size = 5000 pad_len = 30 batch_size = 64 hidden_dim = 600 es = EarlyStop(2) word2id, id2word = build_vocab(fold_path, vocab_size, use_unk=True) embedding_dim = len(word2id) train_data = DataSet(os.path.join(fold_path, 'train.csv'), pad_len, word2id, num_labels) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) test_data = DataSet(os.path.join(fold_path, 'test.csv'), pad_len, word2id, num_labels) test_loader = DataLoader(test_data, batch_size=batch_size) model = AttentionLSTMClassifier(embedding_dim, hidden_dim, vocab_size, word2id, num_labels, batch_size) model.load_bog_embedding(word2id) model.cuda() optimizer = optim.Adam( filter(lambda p: p.requires_grad, model.parameters())) loss_criterion = nn.BCELoss() for epoch in range(4): print('Epoch:', epoch, '===================================') train_loss = 0 for i, (data, seq_len, label) in enumerate(train_loader): data, label, seq_len = sort_batch(data, label, seq_len.view(-1)) y_pred = model(Variable(data).cuda(), seq_len) optimizer.zero_grad() loss = loss_criterion(y_pred, Variable(label).cuda()) loss.backward() optimizer.step() train_loss += loss.data[0] pred_list = [] gold_list = [] test_loss = 0 for i, (data, seq_len, label) in enumerate(test_loader): data, label, seq_len = sort_batch(data, label, seq_len.view(-1)) y_pred = model(Variable(data, volatile=True).cuda(), seq_len) loss = loss_criterion(y_pred, Variable(label, volatile=True).cuda()) test_loss += loss.data[0] pred_list.append(y_pred.data.cpu().numpy()) gold_list.append(label.numpy()) print("Train Loss: ", train_loss, " Evaluation: ", test_loss) es.new_loss(test_loss) if es.if_stop(): print('Start over fitting') break f_ma = [] f_mi = [] for threshold in range(0, 100, 5): threshold /= 100 tmp = CalculateFM(np.concatenate(pred_list, axis=0), np.concatenate(gold_list, axis=0), threshold=threshold) f_ma.append(tmp['MacroFM']) f_mi.append(tmp['MicroFM']) return f_ma, f_mi
def __init__(self, config): self.config = config # start tensorboard summary writer self.writer = SummaryWriter(config.log_path) # load training dataset generator if self.config.random_flip or self.config.random_crop: self.train_loader = LMDBDataLoaderAugmenter( self.config, self.config.train_source) else: self.train_loader = LMDBDataLoader(self.config, self.config.train_source) print(f"Training with {len(self.train_loader.dataset)} images.") # loads validation dataset generator if a validation dataset is given if self.config.val_source is not None: self.val_loader = LMDBDataLoader(self.config, self.config.val_source, False) # creates model self.img2pose_model = img2poseModel( depth=self.config.depth, min_size=self.config.min_size, max_size=self.config.max_size, device=self.config.device, pose_mean=self.config.pose_mean, pose_stddev=self.config.pose_stddev, distributed=self.config.distributed, gpu=self.config.gpu, threed_68_points=np.load(self.config.threed_68_points), threed_5_points=np.load(self.config.threed_5_points), ) # optimizer for the backbone and heads if args.optimizer == "Adam": self.optimizer = optim.Adam( self.img2pose_model.fpn_model.parameters(), lr=self.config.lr, weight_decay=self.config.weight_decay, ) elif args.optimizer == "SGD": self.optimizer = optim.SGD( self.img2pose_model.fpn_model.parameters(), lr=self.config.lr, weight_decay=self.config.weight_decay, momentum=self.config.momentum, ) else: raise Exception( "No optimizer founded, please select between SGD or Adam.") # loads a model with optimizer so that it can continue training where it stopped if self.config.resume_path: print(f"Resuming training from {self.config.resume_path}") load_model( self.img2pose_model.fpn_model, self.config.resume_path, model_only=False, optimizer=self.optimizer, cpu_mode=str(self.config.device) == "cpu", ) # loads a pretrained model without loading the optimizer if self.config.pretrained_path: print( f"Loading pretrained weights from {self.config.pretrained_path}" ) load_model( self.img2pose_model.fpn_model, self.config.pretrained_path, model_only=True, cpu_mode=str(self.config.device) == "cpu", ) # saves configuration to file for easier retrival later print(self.config) self.save_file(self.config, "config.txt") # saves optimizer config to file for easier retrival later print(self.optimizer) self.save_file(self.optimizer, "optimizer.txt") self.tensorboard_loss_every = max(len(self.train_loader) // 100, 1) # self.evaluate_every = max(len(self.train_loader) // 1, 1) # reduce learning rate when the validation loss stops to decrease if self.config.lr_plateau: self.scheduler = ReduceLROnPlateau( self.optimizer, mode="min", factor=0.1, patience=3, verbose=True, threshold=0.001, cooldown=1, min_lr=0.00001, ) # stops training before the defined epochs if validation loss stops to decrease if self.config.early_stop: self.early_stop = EarlyStop(mode="min", patience=5)
class TrainingAgent: config: Dict[str, Any] = None train_loader = None test_loader = None train_sampler = None num_classes: int = None network: torch.nn.Module = None optimizer: torch.optim.Optimizer = None scheduler = None loss = None output_filename: Path = None checkpoint = None def __init__(self, config_path: Path, device: str, output_path: Path, data_path: Path, checkpoint_path: Path, resume: Path = None, save_freq: int = 25, gpu: int = None, ngpus_per_node: int = 0, world_size: int = -1, rank: int = -1, dist: bool = False, mpd: bool = False, dist_url: str = None, dist_backend: str = None) -> None: self.gpu = gpu self.mpd = mpd self.dist = dist self.rank = rank self.best_acc1 = 0. self.start_epoch = 0 self.start_trial = 0 self.device = device self.resume = resume self.dist_url = dist_url self.save_freq = save_freq self.world_size = world_size self.dist_backend = dist_backend self.ngpus_per_node = ngpus_per_node self.data_path = data_path self.output_path = output_path self.checkpoint_path = checkpoint_path self.load_config(config_path, data_path) print("AdaS: Experiment Configuration") print("-" * 45) for k, v in self.config.items(): if isinstance(v, list) or isinstance(v, dict): print(f" {k:<20} {v}") else: print(f" {k:<20} {v:<20}") print("-" * 45) def load_config(self, config_path: Path, data_path: Path) -> None: with config_path.open() as f: self.config = config = parse_config(yaml.load(f)) if self.device == 'cpu': warnings.warn("Using CPU will be slow") elif self.dist: if self.gpu is not None: config['mini_batch_size'] = int(config['mini_batch_size'] / self.ngpus_per_node) config['num_workers'] = int( (config['num_workers'] + self.ngpus_per_node - 1) / self.ngpus_per_node) self.train_loader, self.train_sampler,\ self.test_loader, self.num_classes = get_data( name=config['dataset'], root=data_path, mini_batch_size=config['mini_batch_size'], num_workers=config['num_workers'], dist=self.dist) self.criterion = torch.nn.CrossEntropyLoss().cuda(self.gpu) if \ config['loss'] == 'cross_entropy' else None if np.less(float(config['early_stop_threshold']), 0): print("AdaS: Notice: early stop will not be used as it was " + f"set to {config['early_stop_threshold']}, " + "training till completion") elif config['optimizer'] != 'SGD' and \ config['scheduler'] != 'AdaS': print("AdaS: Notice: early stop will not be used as it is not " + "SGD with AdaS, training till completion") config['early_stop_threshold'] = -1. self.early_stop = EarlyStop( patience=int(config['early_stop_patience']), threshold=float(config['early_stop_threshold'])) cudnn.benchmark = True if self.resume is not None: if self.gpu is None: self.checkpoint = torch.load(str(self.resume)) else: self.checkpoint = torch.load(str(self.resume), map_location=f'cuda:{self.gpu}') self.start_epoch = self.checkpoint['epoch'] self.start_trial = self.checkpoint['trial'] self.best_acc1 = self.checkpoint['best_acc1'] print(f'Resuming config for trial {self.start_trial} at ' + f'epoch {self.start_epoch}') # self.reset() def reset(self, learning_rate: float) -> None: self.performance_statistics = dict() self.network = get_network(name=self.config['network'], num_classes=self.num_classes) self.metrics = Metrics(list(self.network.parameters()), p=self.config['p']) # TODO add other parallelisms if self.device == 'cpu': print("Resetting cpu-based network") elif self.dist: if self.gpu is not None: torch.cuda.set_device(self.gpu) self.network.cuda(self.gpu) self.network = torch.nn.parallel.DistributedDataParallel( self.network, device_ids=[self.gpu]) else: self.network.cuda() self.network = torch.nn.parallel.DistributedDataParallel( self.network) elif self.gpu is not None: torch.cuda.set_device(self.gpu) self.network = self.network.cuda(self.gpu) else: if isinstance(self.network, VGG): self.network.features = torch.nn.DataParallel( self.network.features) self.network.cuda() else: self.network = torch.nn.DataParallel(self.network) self.optimizer, self.scheduler = get_optimizer_scheduler( optim_method=self.config['optimizer'], lr_scheduler=self.config['scheduler'], init_lr=learning_rate, net_parameters=self.network.parameters(), listed_params=list(self.network.parameters()), train_loader_len=len(self.train_loader), mini_batch_size=self.config['mini_batch_size'], max_epochs=self.config['max_epochs'], optimizer_kwargs=self.config['optimizer_kwargs'], scheduler_kwargs=self.config['scheduler_kwargs']) self.early_stop.reset() def train(self) -> None: if not isinstance(self.config['init_lr'], list): list_lr = [self.config['init_lr']] else: list_lr = self.config['init_lr'] for learning_rate in list_lr: lr_output_path = self.output_path / f'lr-{learning_rate}' lr_output_path.mkdir(exist_ok=True, parents=True) for trial in range(self.start_trial, self.config['n_trials']): self.reset(learning_rate) if trial == self.start_trial and self.resume is not None: print("Resuming Network/Optimizer") self.network.load_state_dict( self.checkpoint['state_dict_network']) self.optimizer.load_state_dict( self.checkpoint['state_dict_optimizer']) if not isinstance(self.scheduler, AdaS) \ and self.scheduler is not None: self.scheduler.load_state_dict( self.checkpoint['state_dict_scheduler']) else: self.metrics.historical_metrics = \ self.checkpoint['historical_metrics'] epochs = range(self.start_epoch, self.config['max_epochs']) self.output_filename = self.checkpoint['output_filename'] self.performance_statistics = self.checkpoint[ 'performance_statistics'] else: epochs = range(0, self.config['max_epochs']) self.output_filename = "results_" +\ f"date={datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}_" +\ f"trial={trial}_" +\ f"{self.config['network']}_" +\ f"{self.config['dataset']}_" +\ f"{self.config['optimizer']}" +\ '_'.join([f"{k}={v}" for k, v in self.config['optimizer_kwargs'].items()]) +\ f"_{self.config['scheduler']}" +\ '_'.join([f"{k}={v}" for k, v in self.config['scheduler_kwargs'].items()]) +\ f"_LR={learning_rate}" +\ ".xlsx".replace(' ', '-') self.output_filename = str(lr_output_path / self.output_filename) self.run_epochs(trial, epochs) def run_epochs(self, trial: int, epochs: List[int]) -> None: for epoch in epochs: if self.dist: self.train_sampler.set_epoch(epoch) start_time = time.time() train_loss, (train_acc1, train_acc5) = self.epoch_iteration(trial, epoch) test_loss, (test_acc1, test_acc5) = self.validate(epoch) end_time = time.time() if isinstance(self.scheduler, StepLR): self.scheduler.step() total_time = time.time() scheduler_string = f" w/ {self.config['scheduler']}" if \ self.scheduler is not None else '' print( f"{self.config['optimizer']}{scheduler_string} " + f"on {self.config['dataset']}: " + f"T {trial + 1}/{self.config['n_trials']} | " + f"E {epoch + 1}/{epochs[-1] + 1} Ended | " + "E Time: {:.3f}s | ".format(end_time - start_time) + "~Time Left: {:.3f}s | ".format( (total_time - start_time) * (epochs[-1] - epoch)), "Train Loss: {:.4f}% | Train Acc. {:.4f}% | ".format( train_loss, train_acc1 * 100) + "Test Loss: {:.4f}% | Test Acc. {:.4f}%".format( test_loss, test_acc1 * 100)) df = pd.DataFrame(data=self.performance_statistics) df.to_excel(self.output_filename) if self.early_stop(train_loss): print("AdaS: Early stop activated.") break if not self.mpd or \ (self.mpd and self.rank % self.ngpus_per_node == 0): data = { 'epoch': epoch + 1, 'trial': trial, 'config': self.config, 'state_dict_network': self.network.state_dict(), 'state_dict_optimizer': self.optimizer.state_dict(), 'state_dict_scheduler': self.scheduler.state_dict() if not isinstance(self.scheduler, AdaS) and self.scheduler is not None else None, 'best_acc1': self.best_acc1, 'performance_statistics': self.performance_statistics, 'output_filename': Path(self.output_filename).name, 'historical_metrics': self.metrics.historical_metrics } if epoch % self.save_freq == 0: filename = f'trial_{trial}_epoch_{epoch}.pth.tar' torch.save(data, str(self.checkpoint_path / filename)) if np.greater(test_acc1, self.best_acc1): self.best_acc1 = test_acc1 torch.save(data, str(self.checkpoint_path / 'best.pth.tar')) torch.save(data, str(self.checkpoint_path / 'last.pth.tar')) def epoch_iteration(self, trial: int, epoch: int): # logging.info(f"Adas: Train: Epoch: {epoch}") # global net, performance_statistics, metrics, adas, config self.network.train() train_loss = 0 top1 = AverageMeter() top5 = AverageMeter() # correct = 0 # total = 0 """train CNN architecture""" for batch_idx, (inputs, targets) in enumerate(self.train_loader): # start = time.time() # print(f'{batch_idx} / {len(train_loader)}') if self.gpu is not None: inputs = inputs.cuda(self.gpu, non_blocking=True) if self.device == 'cuda': targets = targets.cuda(self.gpu, non_blocking=True) # inputs, targets = inputs.to(self.device), targets.to(self.device) if isinstance(self.scheduler, CosineAnnealingWarmRestarts): self.scheduler.step(epoch + batch_idx / len(self.train_loader)) self.optimizer.zero_grad() if isinstance(self.optimizer, SLS) or \ isinstance(self.optimizer, AdaSLS): def closure(): outputs = self.network(inputs) loss = self.criterion(outputs, targets) return loss, outputs loss, outputs = self.optimizer.step(closure=closure) else: outputs = self.network(inputs) loss = self.criterion(outputs, targets) loss.backward() if isinstance(self.scheduler, AdaS): self.optimizer.step(self.metrics.layers_index_todo, self.scheduler.lr_vector) elif isinstance(self.optimizer, SPS): self.optimizer.step(loss=loss) else: self.optimizer.step() train_loss += loss.item() # _, predicted = outputs.max(1) # total += targets.size(0) # correct += predicted.eq(targets).sum().item() acc1, acc5 = accuracy(outputs, targets, (1, 5)) top1.update(acc1[0], inputs.size(0)) top5.update(acc5[0], inputs.size(0)) if isinstance(self.scheduler, OneCycleLR): self.scheduler.step() self.performance_statistics[f'train_acc1_epoch_{epoch}'] = \ top1.avg.cpu().item() / 100. self.performance_statistics[f'train_acc5_epoch_{epoch}'] = \ top5.avg.cpu().item() / 100. self.performance_statistics[f'train_loss_epoch_{epoch}'] = \ train_loss / (batch_idx + 1) io_metrics = self.metrics.evaluate(epoch) self.performance_statistics[f'in_S_epoch_{epoch}'] = \ io_metrics.input_channel_S self.performance_statistics[f'out_S_epoch_{epoch}'] = \ io_metrics.output_channel_S self.performance_statistics[f'fc_S_epoch_{epoch}'] = \ io_metrics.fc_S self.performance_statistics[f'in_rank_epoch_{epoch}'] = \ io_metrics.input_channel_rank self.performance_statistics[f'out_rank_epoch_{epoch}'] = \ io_metrics.output_channel_rank self.performance_statistics[f'fc_rank_epoch_{epoch}'] = \ io_metrics.fc_rank self.performance_statistics[f'in_condition_epoch_{epoch}'] = \ io_metrics.input_channel_condition self.performance_statistics[f'out_condition_epoch_{epoch}'] = \ io_metrics.output_channel_condition # if GLOBALS.ADAS is not None: if isinstance(self.scheduler, AdaS): lrmetrics = self.scheduler.step(epoch, self.metrics) self.performance_statistics[f'rank_velocity_epoch_{epoch}'] = \ lrmetrics.rank_velocity self.performance_statistics[f'learning_rate_epoch_{epoch}'] = \ lrmetrics.r_conv else: # if GLOBALS.CONFIG['optim_method'] == 'SLS' or \ # GLOBALS.CONFIG['optim_method'] == 'SPS': if isinstance(self.optimizer, SLS) or isinstance( self.optimizer, SPS) or isinstance(self.optimizer, AdaSLS): self.performance_statistics[f'learning_rate_epoch_{epoch}'] = \ self.optimizer.state['step_size'] else: self.performance_statistics[ f'learning_rate_epoch_{epoch}'] = \ self.optimizer.param_groups[0]['lr'] return train_loss / (batch_idx + 1), (top1.avg.cpu().item() / 100., top5.avg.cpu().item() / 100.) def validate(self, epoch: int): self.network.eval() test_loss = 0 # correct = 0 # total = 0 top1 = AverageMeter() top5 = AverageMeter() with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(self.test_loader): # inputs, targets = \ # inputs.to(self.device), targets.to(self.device) if self.gpu is not None: inputs = inputs.cuda(self.gpu, non_blocking=True) if self.device == 'cuda': targets = targets.cuda(self.gpu, non_blocking=True) outputs = self.network(inputs) loss = self.criterion(outputs, targets) test_loss += loss.item() # _, predicted = outputs.max(1) # total += targets.size(0) # correct += predicted.eq(targets).sum().item() acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) top1.update(acc1[0], inputs.size(0)) top5.update(acc5[0], inputs.size(0)) # Save checkpoint. # acc = 100. * correct / total # if acc > self.best_acc: # # print('Adas: Saving checkpoint...') # state = { # 'net': self.network.state_dict(), # 'acc': acc, # 'epoch': epoch + 1, # } # if not isinstance(self.scheduler, AdaS): # state['historical_io_metrics'] = \ # self.metrics.historical_metrics # torch.save(state, str(self.checkpoint_path / 'ckpt.pth')) # self.best_acc = acc self.performance_statistics[f'test_acc1_epoch_{epoch}'] = ( top1.avg.cpu().item() / 100.) self.performance_statistics[f'test_acc5_epoch_{epoch}'] = ( top5.avg.cpu().item() / 100.) self.performance_statistics[f'test_loss_epoch_{epoch}'] = test_loss / ( batch_idx + 1) return test_loss / (batch_idx + 1), (top1.avg.cpu().item() / 100, top5.avg.cpu().item() / 100)
def main(args: APNamespace): root_path = Path(args.root).expanduser() config_path = Path(args.config).expanduser() data_path = root_path / Path(args.data).expanduser() output_path = root_path / Path(args.output).expanduser() global checkpoint_path, config checkpoint_path = root_path / Path(args.checkpoint).expanduser() if not config_path.exists(): # logging.critical(f"AdaS: Config path {config_path} does not exist") print(f"AdaS: Config path {config_path} does not exist") raise ValueError if not data_path.exists(): print(f"AdaS: Data dir {data_path} does not exist, building") data_path.mkdir(exist_ok=True, parents=True) if not output_path.exists(): print(f"AdaS: Output dir {output_path} does not exist, building") output_path.mkdir(exist_ok=True, parents=True) if not checkpoint_path.exists(): if args.resume: print(f"AdaS: Cannot resume from checkpoint without specifying " + "checkpoint dir") raise ValueError checkpoint_path.mkdir(exist_ok=True, parents=True) with config_path.open() as f: config = yaml.load(f) print("Adas: Argument Parser Options") print("-" * 45) print(f" {'config':<20}: {args.config:<40}") print(f" {'data':<20}: {str(Path(args.root) / args.data):<40}") print(f" {'output':<20}: {str(Path(args.root) / args.output):<40}") print(f" {'checkpoint':<20}: " + f"{str(Path(args.root) / args.checkpoint):<40}") print(f" {'root':<20}: {args.root:<40}") print(f" {'resume':<20}: {'True' if args.resume else 'False':<20}") print("\nAdas: Train: Config") print(f" {'Key':<20} {'Value':<20}") print("-" * 45) for k, v in config.items(): print(f" {k:<20} {v:<20}") device = 'cuda' if torch.cuda.is_available() else 'cpu' print(f"AdaS: Pytorch device is set to {device}") global best_acc best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch if np.less(float(config['early_stop_threshold']), 0): print("AdaS: Notice: early stop will not be used as it was set to " + f"{config['early_stop_threshold']}, training till completion.") for trial in range(config['n_trials']): if config['lr_scheduler'] == 'AdaS': filename = \ f"stats_{config['optim_method']}_AdaS_trial={trial}_" +\ f"beta={config['beta']}_initlr={config['init_lr']}_" +\ f"net={config['network']}_dataset={config['dataset']}.csv" else: filename = \ f"stats_{config['optim_method']}_{config['lr_scheduler']}_" +\ f"trial={trial}_initlr={config['init_lr']}" +\ f"net={config['network']}_dataset={config['dataset']}.csv" Profiler.filename = output_path / filename device # Data # logging.info("Adas: Preparing Data") train_loader, test_loader = get_data( root=data_path, dataset=config['dataset'], mini_batch_size=config['mini_batch_size']) global performance_statistics, net, metrics, adas performance_statistics = {} # logging.info("AdaS: Building Model") net = get_net(config['network'], num_classes=10 if config['dataset'] == 'CIFAR10' else 100 if config['dataset'] == 'CIFAR100' else 1000 if config['dataset'] == 'ImageNet' else 10) metrics = Metrics(list(net.parameters()), p=config['p']) if config['lr_scheduler'] == 'AdaS': adas = AdaS(parameters=list(net.parameters()), beta=config['beta'], zeta=config['zeta'], init_lr=float(config['init_lr']), min_lr=float(config['min_lr']), p=config['p']) net = net.to(device) global criterion criterion = get_loss(config['loss']) optimizer, scheduler = get_optimizer_scheduler( net_parameters=net.parameters(), init_lr=float(config['init_lr']), optim_method=config['optim_method'], lr_scheduler=config['lr_scheduler'], train_loader_len=len(train_loader), max_epochs=int(config['max_epoch'])) early_stop = EarlyStop(patience=int(config['early_stop_patience']), threshold=float(config['early_stop_threshold'])) if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True if args.resume: # Load checkpoint. print("Adas: Resuming from checkpoint...") checkpoint = torch.load(str(checkpoint_path / 'ckpt.pth')) # if checkpoint_path.is_dir(): # checkpoint = torch.load(str(checkpoint_path / 'ckpt.pth')) # else: # checkpoint = torch.load(str(checkpoint_path)) net.load_state_dict(checkpoint['net']) best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] if adas is not None: metrics.historical_metrics = \ checkpoint['historical_io_metrics'] # model_parameters = filter(lambda p: p.requires_grad, # net.parameters()) # params = sum([np.prod(p.size()) for p in model_parameters]) # print(params) epochs = range(start_epoch, start_epoch + config['max_epoch']) for epoch in epochs: start_time = time.time() # print(f"AdaS: Epoch {epoch}/{epochs[-1]} Started.") train_loss, train_accuracy, test_loss, test_accuracy = epoch_iteration( train_loader, test_loader, epoch, device, optimizer, scheduler) end_time = time.time() if config['lr_scheduler'] == 'StepLR': scheduler.step() total_time = time.time() print( f"AdaS: Trial {trial}/{config['n_trials'] - 1} | " + f"Epoch {epoch}/{epochs[-1]} Ended | " + "Total Time: {:.3f}s | ".format(total_time - start_time) + "Epoch Time: {:.3f}s | ".format(end_time - start_time) + "~Time Left: {:.3f}s | ".format( (total_time - start_time) * (epochs[-1] - epoch)), "Train Loss: {:.4f}% | Train Acc. {:.4f}% | ".format( train_loss, train_accuracy) + "Test Loss: {:.4f}% | Test Acc. {:.4f}%".format( test_loss, test_accuracy)) df = pd.DataFrame(data=performance_statistics) if config['lr_scheduler'] == 'AdaS': xlsx_name = \ f"{config['optim_method']}_AdaS_trial={trial}_" +\ f"beta={config['beta']}_initlr={config['init_lr']}_" +\ f"net={config['network']}_dataset={config['dataset']}.xlsx" else: xlsx_name = \ f"{config['optim_method']}_{config['lr_scheduler']}_" +\ f"trial={trial}_initlr={config['init_lr']}" +\ f"net={config['network']}_dataset={config['dataset']}.xlsx" df.to_excel(str(output_path / xlsx_name)) if early_stop(train_loss): print("AdaS: Early stop activated.") break return
def one_fold(X_train, y_train, X_dev, y_dev, class_weight): num_labels = NUM_CLASS vocab_size = 20000 pad_len = 40 batch_size = 64 embedding_dim = 200 hidden_dim = 500 __use_unk = False word2id, id2word = build_vocab(X_train, vocab_size) train_data = DataSet(X_train, y_train, pad_len, word2id, num_labels, use_unk=__use_unk) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) dev_data = DataSet(X_dev, y_dev, pad_len, word2id, num_labels, use_unk=__use_unk) dev_loader = DataLoader(dev_data, batch_size=batch_size, shuffle=False) # test_data = TestDataSet(X_test, pad_len, word2id, num_labels, use_unk=__use_unk) # test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False) model = AttentionLSTMClassifier(embedding_dim, hidden_dim, vocab_size, word2id, num_labels, batch_size, use_att=False) model.load_glove_embedding(id2word) model.cuda() es = EarlyStop(2) optimizer = optim.Adam(model.parameters()) for epoch in range(30): print('Epoch:', epoch, '===================================') train_loss = 0 for i, (data, seq_len, label) in enumerate(train_loader): weight = torch.FloatTensor(class_weight) # re-weight weight_expanded = weight.expand(len(data), -1) loss_criterion = nn.BCELoss(weight=weight_expanded.cuda()) # data, label, seq_len = sort_batch(data, label, seq_len.view(-1)) y_pred = model(Variable(data).cuda(), seq_len) #roc_reward = roc_auc_score(label.numpy().argmax(axis=1), y_pred.data.cpu().numpy()[:, 1]) optimizer.zero_grad() loss = loss_criterion( y_pred, Variable(label).cuda( )) #* Variable(torch.FloatTensor([roc_reward])).cuda() loss.backward() optimizer.step() train_loss += loss.data[0] pred_list = [] gold_list = [] test_loss = 0 for _, (_data, _seq_len, _label) in enumerate(dev_loader): data, label, seq_len = sort_batch(_data, _label, _seq_len.view(-1)) y_pred = model(Variable(data, volatile=True).cuda(), seq_len) weight = torch.FloatTensor(class_weight) # re-weight weight_expanded = weight.expand(len(data), -1) loss_criterion = nn.BCELoss(weight=weight_expanded.cuda()) loss = loss_criterion(y_pred, Variable(label, volatile=True).cuda()) test_loss += loss.data[0] pred_list.append(y_pred.data.cpu().numpy()) gold_list.append(label.numpy()) pred_list_2 = np.concatenate(pred_list, axis=0)[:, 1] pred_list = np.concatenate(pred_list, axis=0).argmax(axis=1) gold_list = np.concatenate(gold_list, axis=0).argmax(axis=1) roc = roc_auc_score(gold_list, pred_list_2) print('roc:', roc) a = accuracy_score(gold_list, pred_list) p = precision_score(gold_list, pred_list, average='binary') r = recall_score(gold_list, pred_list, average='binary') f1 = f1_score(gold_list, pred_list, average='binary') print('accuracy:', a, 'precision_score:', p, 'recall:', r, 'f1:', f1) print("Train Loss: ", train_loss, " Evaluation: ", test_loss) es.new_loss(test_loss) if es.if_stop(): print('Start over fitting') break return gold_list, pred_list
def one_fold(fold_path): vocab_size = 20000 pad_len = 30 batch_size = 64 embedding_dim = 200 hidden_dim = 800 num_labels = NUM_CLASS X, y = cbet_data(os.path.join(fold_path, 'train.csv')) train_index, dev_index = stratified_shuffle_split(X, y) y = np.asarray(y) X_train, X_dev = [X[i] for i in train_index], [X[i] for i in dev_index] y_train, y_dev = y[train_index], y[dev_index] word2id, id2word = build_vocab(X_train, vocab_size) # __X, __y, __pad_len, __word2id, __num_labels train_data = DataSet(X_train, y_train, pad_len, word2id, num_labels) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) dev_data = DataSet(X_dev, y_dev, pad_len, word2id, num_labels) dev_loader = DataLoader(dev_data, batch_size=batch_size, shuffle=True) X_test, y_test = cbet_data(os.path.join(fold_path, 'test.csv')) test_data = DataSet(X_test, y_test, pad_len, word2id, num_labels) test_loader = DataLoader(test_data, batch_size=batch_size) model = AttentionLSTMClassifier(embedding_dim, hidden_dim, vocab_size, word2id, num_labels, batch_size, use_att=True, soft_last=True) model.load_glove_embedding(id2word) model.cuda() optimizer = optim.Adam(model.parameters()) loss_criterion = nn.BCELoss() es = EarlyStop(2) old_model = None for epoch in range(10): print('Epoch:', epoch, '===================================') train_loss = 0 for i, (data, seq_len, label) in enumerate(train_loader): data, label, seq_len = sort_batch(data, label, seq_len.view(-1)) y_pred = model(Variable(data).cuda(), seq_len) optimizer.zero_grad() loss = loss_criterion(y_pred, Variable(label).cuda()) loss.backward() optimizer.step() train_loss += loss.data[0] pred_list = [] gold_list = [] test_loss = 0 # evaluation for i, (data, seq_len, label) in enumerate(dev_loader): data, label, seq_len = sort_batch(data, label, seq_len.view(-1)) y_pred = model(Variable(data, volatile=True).cuda(), seq_len) loss = loss_criterion(y_pred, Variable(label, volatile=True).cuda()) test_loss += loss.data[0] pred_list.append(y_pred.data.cpu().numpy()) gold_list.append(label.numpy()) if old_model is not None: del old_model old_model = copy.deepcopy(model) else: old_model = copy.deepcopy(model) print("Train Loss: ", train_loss, " Evaluation: ", test_loss) es.new_loss(test_loss) if es.if_stop(): print('Start over fitting') del model model = old_model break # testing pred_list = [] gold_list = [] test_loss = 0 for i, (data, seq_len, label) in enumerate(test_loader): data, label, seq_len = sort_batch(data, label, seq_len.view(-1)) y_pred = model(Variable(data, volatile=True).cuda(), seq_len) loss = loss_criterion(y_pred, Variable(label, volatile=True).cuda()) test_loss += loss.data[0] pred_list.append(y_pred.data.cpu().numpy()) gold_list.append(label.numpy()) return np.concatenate(pred_list, axis=0), np.concatenate(gold_list, axis=0)
def one_fold(X_train, y_train, X_dev, y_dev): num_labels = NUM_CLASS vocab_size = 30000 pad_len = 40 batch_size = 64 embedding_dim = 200 hidden_dim = 600 __use_unk = False word2id, id2word = build_vocab(X_train, vocab_size) train_data = DataSet(X_train, y_train, pad_len, word2id, num_labels, use_unk=__use_unk) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) dev_data = DataSet(X_dev, y_dev, pad_len, word2id, num_labels, use_unk=__use_unk) dev_loader = DataLoader(dev_data, batch_size=batch_size, shuffle=False) # test_data = TestDataSet(X_test, pad_len, word2id, num_labels, use_unk=__use_unk) # test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False) model = AttentionLSTMClassifier(embedding_dim, hidden_dim, vocab_size, word2id, num_labels, batch_size, use_att=True, soft_last=False) model.load_glove_embedding(id2word) model.cuda() es = EarlyStop(2) optimizer = optim.Adam(model.parameters(), lr=1e-5) loss_criterion = nn.MSELoss() # old_model = None for epoch in range(20): print('Epoch:', epoch, '===================================') train_loss = 0 model.train() for i, (data, seq_len, label) in enumerate(train_loader): data, label, seq_len = sort_batch(data, label, seq_len.view(-1)) y_pred = model(Variable(data).cuda(), seq_len) #roc_reward = roc_auc_score(label.numpy().argmax(axis=1), y_pred.data.cpu().numpy()[:, 1]) optimizer.zero_grad() loss = loss_criterion( y_pred, Variable(label).cuda( )) #* Variable(torch.FloatTensor([roc_reward])).cuda() loss.backward() optimizer.step() train_loss += loss.data[0] * batch_size pred_list = [] gold_list = [] test_loss = 0 model.eval() for _, (_data, _seq_len, _label) in enumerate(dev_loader): data, label, seq_len = sort_batch(_data, _label, _seq_len.view(-1)) y_pred = model(Variable(data, volatile=True).cuda(), seq_len) loss = loss_criterion( y_pred, Variable(label).cuda( )) #* Variable(torch.FloatTensor([roc_reward])).cuda() test_loss += loss.data[0] * batch_size y_pred = y_pred.data.cpu().numpy() pred_list.append(y_pred) # x[np.where( x > 3.0 )] gold_list.append(label.numpy()) # pred_list_2 = np.concatenate(pred_list, axis=0)[:, 1] pred_list = np.concatenate(pred_list, axis=0) gold_list = np.concatenate(gold_list, axis=0) # roc = roc_auc_score(gold_list, pred_list_2) # print('roc:', roc) # a = accuracy_score(gold_list, pred_list) # p = precision_score(gold_list, pred_list, average='binary') # r = recall_score(gold_list, pred_list, average='binary') # f1 = f1_score(gold_list, pred_list, average='binary') # print('accuracy:', a, 'precision_score:', p, 'recall:', r, 'f1:', f1) print("Train Loss: ", train_loss / len(train_data), " Evaluation: ", test_loss / len(dev_data)) es.new_loss(test_loss) if old_model is not None: del old_model, old_pred_list old_model = copy.deepcopy(model) old_pred_list = copy.deepcopy(pred_list) else: old_model = copy.deepcopy(model) old_pred_list = copy.deepcopy(pred_list) if es.if_stop(): print('Start over fitting') del model model = old_model pred_list = old_pred_list torch.save(model.state_dict(), open(os.path.join('checkpoint', 'cbet.model'), 'wb')) with open('checkpoint/some_data.pkl', 'wb') as f: pickle.dump([word2id, id2word], f) break return gold_list, pred_list, model, pad_len, word2id, num_labels