def run(args): batch_size = args.batch_size training_params = {"batch_size": batch_size, "shuffle": True, "num_workers": args.workers} texts, labels, number_of_classes, sample_weights = load_data(args) train_texts, _, train_labels, _, _, _ = train_test_split(texts, labels, sample_weights, test_size=args.validation_split, random_state=42, stratify=labels) training_set = MyDataset(train_texts, train_labels, args) training_generator = DataLoader(training_set, **training_params) model = CharacterLevelCNN(args, number_of_classes) if torch.cuda.is_available(): model.cuda() model.train() criterion = nn.CrossEntropyLoss() if args.optimizer == 'sgd': optimizer = torch.optim.SGD( model.parameters(), lr=args.start_lr, momentum=0.9 ) elif args.optimizer == 'adam': optimizer = torch.optim.Adam( model.parameters(), lr=args.start_lr ) start_lr = args.start_lr end_lr = args.end_lr lr_find_epochs = args.epochs smoothing = args.smoothing def lr_lambda(x): return math.exp( x * math.log(end_lr / start_lr) / (lr_find_epochs * len(training_generator))) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda) losses = [] learning_rates = [] for epoch in range(lr_find_epochs): print(f'[epoch {epoch + 1} / {lr_find_epochs}]') progress_bar = tqdm(enumerate(training_generator), total=len(training_generator)) for iter, batch in progress_bar: features, labels = batch if torch.cuda.is_available(): features = features.cuda() labels = labels.cuda() optimizer.zero_grad() predictions = model(features) loss = criterion(predictions, labels) loss.backward() optimizer.step() scheduler.step() lr = optimizer.state_dict()["param_groups"][0]["lr"] learning_rates.append(lr) if iter == 0: losses.append(loss.item()) else: loss = smoothing * loss.item() + (1 - smoothing) * losses[-1] losses.append(loss) plt.semilogx(learning_rates, losses) plt.savefig('./plots/losses_vs_lr.png')
def run(args, both_cases=False): if args.flush_history == 1: objects = os.listdir(args.log_path) for f in objects: if os.path.isdir(args.log_path + f): shutil.rmtree(args.log_path + f) now = datetime.now() logdir = args.log_path + now.strftime("%Y%m%d-%H%M%S") + "/" os.makedirs(logdir) log_file = logdir + 'log.txt' writer = SummaryWriter(logdir) batch_size = args.batch_size training_params = { "batch_size": batch_size, "shuffle": True, "num_workers": args.workers, "drop_last": True } validation_params = { "batch_size": batch_size, "shuffle": False, "num_workers": args.workers, "drop_last": True } texts, labels, number_of_classes, sample_weights = load_data(args) class_names = sorted(list(set(labels))) class_names = [str(class_name) for class_name in class_names] train_texts, val_texts, train_labels, val_labels, train_sample_weights, _ = train_test_split( texts, labels, sample_weights, test_size=args.validation_split, random_state=42, stratify=labels) training_set = MyDataset(train_texts, train_labels, args) validation_set = MyDataset(val_texts, val_labels, args) if bool(args.use_sampler): train_sample_weights = torch.from_numpy(train_sample_weights) sampler = WeightedRandomSampler( train_sample_weights.type('torch.DoubleTensor'), len(train_sample_weights)) training_params['sampler'] = sampler training_params['shuffle'] = False training_generator = DataLoader(training_set, **training_params) validation_generator = DataLoader(validation_set, **validation_params) model = CharacterLevelCNN(args, number_of_classes) if torch.cuda.is_available(): model.cuda() if not bool(args.focal_loss): if bool(args.class_weights): class_counts = dict(Counter(train_labels)) m = max(class_counts.values()) for c in class_counts: class_counts[c] = m / class_counts[c] weights = [] for k in sorted(class_counts.keys()): weights.append(class_counts[k]) weights = torch.Tensor(weights) if torch.cuda.is_available(): weights = weights.cuda() print(f'passing weights to CrossEntropyLoss : {weights}') criterion = nn.CrossEntropyLoss(weight=weights) else: criterion = nn.CrossEntropyLoss() else: if args.alpha is None: criterion = FocalLoss(gamma=args.gamma, alpha=None) else: criterion = FocalLoss(gamma=args.gamma, alpha=[args.alpha] * number_of_classes) if args.optimizer == 'sgd': if args.scheduler == 'clr': optimizer = torch.optim.SGD(model.parameters(), lr=1, momentum=0.9, weight_decay=0.00001) else: optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9) elif args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) best_f1 = 0 best_epoch = 0 if args.scheduler == 'clr': stepsize = int(args.stepsize * len(training_generator)) clr = utils.cyclical_lr(stepsize, args.min_lr, args.max_lr) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr]) else: scheduler = None for epoch in range(args.epochs): training_loss, training_accuracy, train_f1 = train( model, training_generator, optimizer, criterion, epoch, writer, log_file, scheduler, class_names, args, args.log_every) validation_loss, validation_accuracy, validation_f1 = evaluate( model, validation_generator, criterion, epoch, writer, log_file, args.log_every) print( '[Epoch: {} / {}]\ttrain_loss: {:.4f} \ttrain_acc: {:.4f} \tval_loss: {:.4f} \tval_acc: {:.4f}' .format(epoch + 1, args.epochs, training_loss, training_accuracy, validation_loss, validation_accuracy)) print("=" * 50) # learning rate scheduling if args.scheduler == 'step': if args.optimizer == 'sgd' and ((epoch + 1) % 3 == 0) and epoch > 0: current_lr = optimizer.state_dict()['param_groups'][0]['lr'] current_lr /= 2 print('Decreasing learning rate to {0}'.format(current_lr)) for param_group in optimizer.param_groups: param_group['lr'] = current_lr # model checkpoint if validation_f1 > best_f1: best_f1 = validation_f1 best_epoch = epoch if args.checkpoint == 1: torch.save( model.state_dict(), args.output + 'model_{}_epoch_{}_maxlen_{}_lr_{}_loss_{}_acc_{}_f1_{}.pth' .format(args.model_name, epoch, args.max_length, optimizer.state_dict()['param_groups'][0]['lr'], round(validation_loss, 4), round(validation_accuracy, 4), round(validation_f1, 4))) if bool(args.early_stopping): if epoch - best_epoch > args.patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {} at epoch {}" .format(epoch, validation_loss, best_epoch)) break
def run(args, both_cases=False): print("3 : -> Entered in Run") #log generation for TensorBoardX log_path = args.log_path if os.path.isdir(log_path): shutil.rmtree(log_path) os.makedirs(log_path) if not os.path.exists(args.output): os.makedirs(args.output) writer = SummaryWriter(log_path) batch_size = args.batch_size training_params = { "batch_size": batch_size, "shuffle": True, "num_workers": args.workers } validation_params = { "batch_size": batch_size, "shuffle": False, "num_workers": args.workers } full_dataset = MyDataset(args) #train size if by default 80% train_size = int(args.validation_split * len(full_dataset)) print("4 :-> ", train_size) #validation_size = ful_dataset - train_size validation_size = len(full_dataset) - train_size print("5 :-> ", validation_size) #torch function to split data into training and validation randomly training_set, validation_set = torch.utils.data.random_split( full_dataset, [train_size, validation_size]) training_generator = DataLoader(training_set, **training_params) print("6 :-> ", training_generator) validation_generator = DataLoader(validation_set, **validation_params) print("7 :-> ", validation_generator) #passing args to src.scc_model model = CharacterLevelCNN(args) print("8 :-> ", model) if torch.cuda.is_available(): model.cuda() criterion = nn.CrossEntropyLoss() print("9 :-> ", criterion) if args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9) elif args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) best_loss = 1e10 best_epoch = 0 for epoch in range(args.epochs): training_loss, training_accuracy = train(model, training_generator, optimizer, criterion, epoch, writer) validation_loss, validation_accuracy = evaluate( model, validation_generator, criterion, epoch, writer) print( '[Epoch: {} / {}]\ttrain_loss: {:.4f} \ttrain_acc: {:.4f} \tval_loss: {:.4f} \tval_acc: {:.4f}' .format(epoch + 1, args.epochs, training_loss, training_accuracy, validation_loss, validation_accuracy)) print("=" * 50) # learning rate scheduling if args.schedule != 0: if args.optimizer == 'sgd' and epoch % args.schedule == 0 and epoch > 0: current_lr = optimizer.state_dict()['param_groups'][0]['lr'] current_lr /= 2 print('Decreasing learning rate to {0}'.format(current_lr)) for param_group in optimizer.param_groups: param_group['lr'] = current_lr # early stopping if validation_loss < best_loss: best_loss = validation_loss best_epoch = epoch if args.checkpoint == 1: torch.save( model, args.output + 'char_cnn_epoch_{}_{}_{}_loss_{}_acc_{}.pth'.format( args.model_name, epoch, optimizer.state_dict()['param_groups'][0]['lr'], round(validation_loss, 4), round( validation_accuracy, 4))) if epoch - best_epoch > args.patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {} at epoch {}" .format(epoch, validation_loss, best_epoch)) break
def run(x_train, y_train, features, x_test, model_obj, feats=False, clip=True): seed_everything(SEED) avg_losses_f = [] avg_val_losses_f = [] # matrix for the out-of-fold predictions train_preds = np.zeros((len(x_train))) # matrix for the predictions on the test set test_preds = np.zeros((len(x_test))) splits = list( StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED).split(x_train, y_train)) for i, (train_idx, valid_idx) in enumerate(splits): seed_everything(i * 1000 + i) x_train = np.array(x_train) y_train = np.array(y_train) if feats: features = np.array(features) x_train_fold = torch.tensor(x_train[train_idx.astype(int)], dtype=torch.long).cuda() y_train_fold = torch.tensor(y_train[train_idx.astype(int), np.newaxis], dtype=torch.float32).cuda() if feats: kfold_X_features = features[train_idx.astype(int)] kfold_X_valid_features = features[valid_idx.astype(int)] test_features = features[valid_idx.astype(int)] x_val_fold = torch.tensor(x_train[valid_idx.astype(int)], dtype=torch.long).cuda() y_val_fold = torch.tensor(y_train[valid_idx.astype(int), np.newaxis], dtype=torch.float32).cuda() model = copy.deepcopy(model_obj) if args.snapshot is not None: print('\nLoading model from {}...'.format(args.snapshot)) cnn.load_state_dict(torch.load(args.snapshot)) model.cuda() loss_fn = torch.nn.BCEWithLogitsLoss(reduction='sum') optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) ################################################################################################ scheduler = False ############################################################################################### train = MyDataset( torch.utils.data.TensorDataset(x_train_fold, y_train_fold)) valid = MyDataset( torch.utils.data.TensorDataset(x_val_fold, y_val_fold)) train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=False) print(f'Fold {i + 1}') steps = 0 best_step = 0 best_val_loss = float('inf') for epoch in range(n_epochs): start_time = time.time() avg_loss = train(model, train_loader, optimizer, loss_fn, scheduler, clip, feats, kfold_X_features) valid_preds_fold = np.zeros((x_val_fold.size(0))) test_preds_fold = np.zeros((len(x_test))) avg_val_loss, valid_preds_fold = evaluate(model, valid_loader, loss_fn, valid_preds_fold, feats, kfold_X_valid_features) steps += 1 elapsed_time = time.time() - start_time print( 'Epoch {}/{} \t loss={:.4f} \t val_loss={:.4f} \t time={:.2f}s' .format(epoch + 1, n_epochs, avg_loss, avg_val_loss, elapsed_time)) if avg_val_loss < best_val_loss: best_val_loss = avg_val_loss best_step = steps if args.save_best: checkpoint(model, i + 1, epoch) else: if steps - best_step >= args.early_stop: print('early stop by {} steps.'.format(steps)) avg_losses_f.append(avg_loss) avg_val_losses_f.append(avg_val_loss) # predict all samples in the test set batch per batch test_preds = test(model, test_loader, train_preds, valid_idx, test_preds, valid_preds_fold, test_preds_fold, splits, feats, test_features) print('All \t loss={:.4f} \t val_loss={:.4f} \t '.format( np.average(avg_losses_f), np.average(avg_val_losses_f))) return train_preds, test_preds