def main(): epochs = 5 num_class = 10 output_path = './output/catalyst' # Use if you want to fix seed # catalyst.utils.set_global_seed(42) # catalyst.utils.prepare_cudnn(deterministic=True) model = get_model() train_loader, val_loader = get_loaders() loaders = {"train": train_loader, "valid": val_loader} optimizer, lr_scheduler = get_optimizer(model=model) criterion = get_criterion() runner = SupervisedRunner(device=catalyst.utils.get_device()) runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=lr_scheduler, loaders=loaders, logdir=output_path, callbacks=[AccuracyCallback(num_classes=num_class, accuracy_args=[1])], num_epochs=epochs, main_metric="accuracy01", minimize_metric=False, fp16=None, verbose=True )
def train( in_csv: str, in_dir: str, model: str = 'resnet18', fold: int = None, n_epochs: int = 30, image_size: int = 224, augmentation: str = 'medium', learning_rate: float = 3e-3, n_milestones: int = 5, batch_size: int = 256, n_workers: int = 4, fast: bool = False, logdir: str = '.', verbose: bool = False ): model = get_model(model=model) loss = criterion.FocalLossMultiClass() # CrossEntropyLoss lr_scaled = learning_rate * (batch_size / 256) # lr linear scaling optimizer = torch.optim.Adam(model.parameters(), lr=lr_scaled) scheduler = schedulers.MultiStepLR( optimizer, milestones=[5, 10, 20, 30, 40], gamma=0.3 ) runner = SupervisedRunner() runner.train( model=model, criterion=loss, optimizer=optimizer, scheduler=scheduler, loaders=get_dataloaders( in_csv=in_csv, in_dir=in_dir, stages=['train', 'valid'], fold=fold, batch_size=batch_size, n_workers=n_workers, image_size=(image_size, image_size), augmentation=augmentation, fast=fast ), callbacks=[ AccuracyCallback(accuracy_args=[1]), BinaryAUCCallback() ], logdir=logdir, num_epochs=n_epochs, verbose=verbose )
def get_callbacks(class_names): num_classes = len(class_names) return [ AccuracyCallback(num_classes=num_classes), AUCCallback( num_classes=num_classes, input_key="targets_one_hot", class_names=class_names ), F1ScoreCallback( input_key="targets_one_hot", activation="Softmax" ) ]
def infer( config_path, log_dir ): """ Inference: 1. loaders 2. model """ # quering params from experiment config batch_size = 116 test_dataset = LipreadingDataset( "test") loaders = { "infer": DataLoader( test_dataset, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=False,) } model = LipNext() device = "cuda" if torch.cuda.is_available() else "cpu" runner = SupervisedRunner(device=device) runner.infer( model=model, loaders=loaders, callbacks=[ AccuracyCallback(accuracy_args=[1, 3]), InferenceCallback(), CheckpointCallbackV2( config_path=config_path, resume=("/home/dmitry.klimenkov/Documents/projects/visper_pytorch/logdir" "/Mobi-VSR-5W-mixed_aligned_patience5_sometests/checkpoints/train.0.35.8553.pth")) # NegativeMiningCallback() ], state_kwargs={ "log_dir": log_dir }, check=True )
def main(args): logdir = "./logdir" num_epochs = 42 # detect gpu device = utils.get_device() utils.fp print(f"device: {device}") # dataset trainset = ImageNetK( '/run/media/mooziisp/仓库/datasets/Kaggle-ILSVRC/ILSVRC', split='train', transform=transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor() ])) trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2, pin_memory=True) loaders = {"train": trainloader} # define net net = models.resnet18(pretrained=False, num_classes=1000) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=1e-4) # trainer runner = SupervisedRunner(device=device) runner.train(model=net, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, callbacks=[AccuracyCallback(num_classes=1000)], num_epochs=num_epochs, verbose=True)
def get_callbacks(self): callbacks_list = [PrecisionRecallF1ScoreCallback(num_classes=4),#DiceCallback(), EarlyStoppingCallback(**self.cb_params["earlystop"]), AccuracyCallback(**self.cb_params["accuracy"]), ] ckpoint_params = self.cb_params["checkpoint_params"] if ckpoint_params["checkpoint_path"] != None: # hacky way to say no checkpoint callback but eh what the heck mode = ckpoint_params["mode"].lower() if mode == "full": print("Stateful loading...") ckpoint_p = Path(ckpoint_params["checkpoint_path"]) fname = ckpoint_p.name # everything in the path besides the base file name resume_dir = str(ckpoint_p.parents[0]) print(f"Loading {fname} from {resume_dir}. \ \nCheckpoints will also be saved in {resume_dir}.") # adding the checkpoint callback callbacks_list = callbacks_list + [CheckpointCallback(resume=fname, resume_dir=resume_dir),] elif mode == "model_only": print("Loading weights into model...") self.model = load_weights_train(ckpoint_params["checkpoint_path"], self.model) return callbacks_list
loaders = OrderedDict() loaders["train"] = train_loader loaders["valid"] = val_loader runner = dl.SupervisedRunner(device=tu.device, input_key="image", input_target_key="label", output_key="logits") callbacks = [ CriterionCallback(input_key="label", output_key="logits", prefix="loss"), AccuracyCallback(input_key="label", output_key="logits", prefix="acc", activation="Sigmoid"), OptimizerCallback(accumulation_steps=2), #MixupCallback(alpha=0.3, input_key="label", output_key="logits", fields=("image", )) ] if TRAINING: runner.train(model=model, criterion=nn.CrossEntropyLoss(), optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir=LOGDIR, num_epochs=EPOCHS, fp16=tu.fp16_params, callbacks=callbacks, verbose=True,
shuffle=True, drop_last=True) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = GenderModel() criterion = nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) runner = SupervisedRunner(device=device) loaders = {'train': train_loader} logdir = str( DIR_DATA_LOGS / 'audio') + '/gender/' + datetime.now().strftime("%Y%m%d-%H%M%S") callbacks = [ AccuracyCallback(), F1ScoreCallback(), ConfusionMatrixCallback(num_classes=2, class_names=['female', 'male']) ] runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, callbacks=callbacks, logdir=logdir, num_epochs=epochs, # verbose=True )
train_iter, val_iter, test_iter = data.BucketIterator.splits( (train, val, test), batch_sizes=(64, 64, 64), device=device, repeat=False, sort=False) train_loader = BucketIteratorWrapper(train_iter) valid_loader = BucketIteratorWrapper(val_iter) loaders = {"train": train_loader, "valid": valid_loader} TEXT.build_vocab(train, min_freq=2) LABELS.build_vocab(train) model = RNN(len(TEXT.vocab.stoi) + 1, num_layers=2, output_size=4) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.01) runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir="./logdir", callbacks=[AccuracyCallback(num_classes=4, accuracy_args=[1])], num_epochs=10, verbose=True, )
def main(): # setup config cfg = config() cfg['device'] = torch.device( "cuda" if torch.cuda.is_available() else "cpu") timestr = time.strftime("%Y%m%d-%H%M%S") cfg['logdir'] += f"{cfg['arch']}_" cfg['logdir'] += f"{cfg['exp_idx']}_" cfg['logdir'] += f"{cfg['input_size']}_" cfg['logdir'] += f"{cfg['criterion']}_" cfg['logdir'] += f"{cfg['optimizer']}_" cfg['logdir'] += f"split{cfg['data_split']}_" cfg['logdir'] += timestr set_global_seed(cfg['random_state']) pprint(cfg) # load data train_df = pd.read_csv(cfg['train_csv_path']) test_df = pd.read_csv(cfg['test_csv_path']) print(len(train_df), len(test_df)) train_img_weights = compute_dataset_weights(train_df) train_transforms, test_transforms = get_transforms(cfg['input_size']) train_dataset = LeafDataset( img_root=cfg['img_root'], df=train_df, img_transforms=train_transforms, is_train=True, ) test_dataset = LeafDataset( img_root=cfg['img_root'], df=test_df, img_transforms=test_transforms, is_train=False, ) print( f"Training set size:{len(train_dataset)}, Test set size:{len(test_dataset)}") # prepare train and test loader if cfg['sampling'] == 'weighted': # image weight based on statistics train_img_weights = compute_dataset_weights(train_df) # weighted sampler weighted_sampler = WeightedRandomSampler( weights=train_img_weights, num_samples=len(train_img_weights), replacement=False) # batch sampler from weigted sampler batch_sampler = BatchSampler( weighted_sampler, batch_size=cfg['batch_size'], drop_last=True) # train loader train_loader = DataLoader( train_dataset, batch_sampler=batch_sampler, num_workers=4) elif cfg['sampling'] == 'normal': train_loader = DataLoader( train_dataset, cfg['batch_size'], shuffle=True, num_workers=2) test_loader = DataLoader( test_dataset, cfg['test_batch_size'], shuffle=False, num_workers=1, drop_last=True) loaders = { 'train': train_loader, 'valid': test_loader } # model setup model = timm.create_model(model_name=cfg['arch'], num_classes=len( cfg['class_names']), drop_rate=0.5, pretrained=True) model.train() # loss if cfg['criterion'] == 'label_smooth': criterion = LabelSmoothingCrossEntropy() elif cfg['criterion'] == 'cross_entropy': criterion = nn.CrossEntropyLoss() # optimizer if cfg['optimizer'] == 'adam': optimizer = torch.optim.Adam( model.parameters(), lr=cfg['lr'], weight_decay=cfg['wd']) elif cfg['optimizer'] == 'adamw': optimizer = AdamW( model.parameters(), lr=cfg['lr'], weight_decay=cfg['wd']) elif cfg['optimizer'] == 'radam': optimizer = RAdam( model.parameters(), lr=cfg['lr'], weight_decay=cfg['wd']) # learning schedule if cfg['lr_schedule'] == 'reduce_plateau': scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=0.5, patience=4) # trainer runner = SupervisedRunner(device=cfg['device']) runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ AccuracyCallback( num_classes=len(cfg['class_names']), threshold=0.5, activation="Softmax" ), ], logdir=cfg['logdir'], num_epochs=cfg['num_epochs'], verbose=cfg['verbose'], # set this true to run for 3 epochs only check=cfg['check'], )
for param in resnet.layer4.parameters(): param.requires_grad = True loss_fn = nn.CrossEntropyLoss() opt = torch.optim.SGD(resnet.parameters(), lr=0.01, momentum=0.9) logdir = '/tmp/protein/logs/' runner = SupervisedRunner() sched = OneCycleLR(opt, num_steps=epochs * len(loaders['train']), warmup_fraction=0.3, lr_range=(0.1, 0.0001)) runner.train(model=resnet, criterion=loss_fn, optimizer=opt, loaders=loaders, logdir=logdir, num_epochs=epochs, scheduler=sched, callbacks=[ AccuracyCallback(num_classes=num_classes), F1ScoreCallback(input_key="targets_one_hot", activation="Softmax") ], verbose=True) print('Saving the trained model') basedir = os.path.expanduser('~/data/protein/tmp/models') os.makedirs(basedir, exist_ok=True) torch.save(resnet, os.path.join(basedir, 'resnet50_simple.pth'))
def main(): parser = argparse.ArgumentParser() parser.add_argument("-acc", "--accumulation-steps", type=int, default=1, help="Number of batches to process") parser.add_argument("--seed", type=int, default=42, help="Random seed") parser.add_argument("-v", "--verbose", action="store_true") parser.add_argument("--fast", action="store_true") parser.add_argument("-dd", "--data-dir", type=str, required=True, help="Data directory for INRIA sattelite dataset") parser.add_argument("-m", "--model", type=str, default="resnet34_fpncat128", help="") parser.add_argument("-b", "--batch-size", type=int, default=8, help="Batch Size during training, e.g. -b 64") parser.add_argument("-e", "--epochs", type=int, default=100, help="Epoch to run") # parser.add_argument('-es', '--early-stopping', type=int, default=None, help='Maximum number of epochs without improvement') # parser.add_argument('-fe', '--freeze-encoder', type=int, default=0, help='Freeze encoder parameters for N epochs') # parser.add_argument('-ft', '--fine-tune', action='store_true') parser.add_argument("-lr", "--learning-rate", type=float, default=1e-3, help="Initial learning rate") parser.add_argument( "--disaster-type-loss", type=str, default=None, # [["ce", 1.0]], action="append", nargs="+", help="Criterion for classifying disaster type", ) parser.add_argument( "--damage-type-loss", type=str, default=None, # [["bce", 1.0]], action="append", nargs="+", help= "Criterion for classifying presence of building with particular damage type", ) parser.add_argument("-l", "--criterion", type=str, default=None, action="append", nargs="+", help="Criterion") parser.add_argument("--mask4", type=str, default=None, action="append", nargs="+", help="Criterion for mask with stride 4") parser.add_argument("--mask8", type=str, default=None, action="append", nargs="+", help="Criterion for mask with stride 8") parser.add_argument("--mask16", type=str, default=None, action="append", nargs="+", help="Criterion for mask with stride 16") parser.add_argument("--mask32", type=str, default=None, action="append", nargs="+", help="Criterion for mask with stride 32") parser.add_argument("--embedding", type=str, default=None) parser.add_argument("-o", "--optimizer", default="RAdam", help="Name of the optimizer") parser.add_argument( "-c", "--checkpoint", type=str, default=None, help="Checkpoint filename to use as initial model weights") parser.add_argument("-w", "--workers", default=8, type=int, help="Num workers") parser.add_argument("-a", "--augmentations", default="safe", type=str, help="Level of image augmentations") parser.add_argument("--transfer", default=None, type=str, help="") parser.add_argument("--fp16", action="store_true") parser.add_argument("--size", default=512, type=int) parser.add_argument("--fold", default=0, type=int) parser.add_argument("-s", "--scheduler", default="multistep", type=str, help="") parser.add_argument("-x", "--experiment", default=None, type=str, help="") parser.add_argument("-d", "--dropout", default=0.0, type=float, help="Dropout before head layer") parser.add_argument("-pl", "--pseudolabeling", type=str, required=True) parser.add_argument("-wd", "--weight-decay", default=0, type=float, help="L2 weight decay") parser.add_argument("--show", action="store_true") parser.add_argument("--dsv", action="store_true") parser.add_argument("--balance", action="store_true") parser.add_argument("--only-buildings", action="store_true") parser.add_argument("--freeze-bn", action="store_true") parser.add_argument("--crops", action="store_true", help="Train on random crops") parser.add_argument("--post-transform", action="store_true") args = parser.parse_args() set_manual_seed(args.seed) data_dir = args.data_dir num_workers = args.workers num_epochs = args.epochs learning_rate = args.learning_rate model_name = args.model optimizer_name = args.optimizer image_size = args.size, args.size fast = args.fast augmentations = args.augmentations fp16 = args.fp16 scheduler_name = args.scheduler experiment = args.experiment dropout = args.dropout segmentation_losses = args.criterion verbose = args.verbose show = args.show accumulation_steps = args.accumulation_steps weight_decay = args.weight_decay fold = args.fold balance = args.balance only_buildings = args.only_buildings freeze_bn = args.freeze_bn train_on_crops = args.crops enable_post_image_transform = args.post_transform disaster_type_loss = args.disaster_type_loss train_batch_size = args.batch_size embedding_criterion = args.embedding damage_type_loss = args.damage_type_loss pseudolabels_dir = args.pseudolabeling # Compute batch size for validaion if train_on_crops: valid_batch_size = max(1, (train_batch_size * (image_size[0] * image_size[1])) // (1024**2)) else: valid_batch_size = train_batch_size run_train = num_epochs > 0 model: nn.Module = get_model(model_name, dropout=dropout).cuda() if args.transfer: transfer_checkpoint = fs.auto_file(args.transfer) print("Transfering weights from model checkpoint", transfer_checkpoint) checkpoint = load_checkpoint(transfer_checkpoint) pretrained_dict = checkpoint["model_state_dict"] transfer_weights(model, pretrained_dict) if args.checkpoint: checkpoint = load_checkpoint(fs.auto_file(args.checkpoint)) unpack_checkpoint(checkpoint, model=model) print("Loaded model weights from:", args.checkpoint) report_checkpoint(checkpoint) if freeze_bn: torch_utils.freeze_bn(model) print("Freezing bn params") runner = SupervisedRunner(input_key=INPUT_IMAGE_KEY, output_key=None) main_metric = "weighted_f1" cmd_args = vars(args) current_time = datetime.now().strftime("%b%d_%H_%M") checkpoint_prefix = f"{current_time}_{args.model}_{args.size}_fold{fold}" if fp16: checkpoint_prefix += "_fp16" if fast: checkpoint_prefix += "_fast" if pseudolabels_dir: checkpoint_prefix += "_pseudo" if train_on_crops: checkpoint_prefix += "_crops" if experiment is not None: checkpoint_prefix = experiment log_dir = os.path.join("runs", checkpoint_prefix) os.makedirs(log_dir, exist_ok=False) config_fname = os.path.join(log_dir, f"{checkpoint_prefix}.json") with open(config_fname, "w") as f: train_session_args = vars(args) f.write(json.dumps(train_session_args, indent=2)) default_callbacks = [ CompetitionMetricCallback(input_key=INPUT_MASK_KEY, output_key=OUTPUT_MASK_KEY, prefix="weighted_f1"), ConfusionMatrixCallback( input_key=INPUT_MASK_KEY, output_key=OUTPUT_MASK_KEY, class_names=[ "land", "no_damage", "minor_damage", "major_damage", "destroyed" ], ignore_index=UNLABELED_SAMPLE, ), ] if show: default_callbacks += [ ShowPolarBatchesCallback(draw_predictions, metric=main_metric + "_batch", minimize=False) ] train_ds, valid_ds, train_sampler = get_datasets( data_dir=data_dir, image_size=image_size, augmentation=augmentations, fast=fast, fold=fold, balance=balance, only_buildings=only_buildings, train_on_crops=train_on_crops, crops_multiplication_factor=1, enable_post_image_transform=enable_post_image_transform, ) if run_train: loaders = collections.OrderedDict() callbacks = default_callbacks.copy() criterions_dict = {} losses = [] unlabeled_train = get_pseudolabeling_dataset( data_dir, include_masks=True, image_size=image_size, augmentation="medium_nmd", train_on_crops=train_on_crops, enable_post_image_transform=enable_post_image_transform, pseudolabels_dir=pseudolabels_dir, ) train_ds = train_ds + unlabeled_train print("Using online pseudolabeling with ", len(unlabeled_train), "samples") loaders["train"] = DataLoader( train_ds, batch_size=train_batch_size, num_workers=num_workers, pin_memory=True, drop_last=True, shuffle=True, ) loaders["valid"] = DataLoader(valid_ds, batch_size=valid_batch_size, num_workers=num_workers, pin_memory=True) # Create losses for criterion in segmentation_losses: if isinstance(criterion, (list, tuple)) and len(criterion) == 2: loss_name, loss_weight = criterion else: loss_name, loss_weight = criterion[0], 1.0 cd, criterion, criterion_name = get_criterion_callback( loss_name, prefix="segmentation", input_key=INPUT_MASK_KEY, output_key=OUTPUT_MASK_KEY, loss_weight=float(loss_weight), ) criterions_dict.update(cd) callbacks.append(criterion) losses.append(criterion_name) print(INPUT_MASK_KEY, "Using loss", loss_name, loss_weight) if args.mask4 is not None: for criterion in args.mask4: if isinstance(criterion, (list, tuple)): loss_name, loss_weight = criterion else: loss_name, loss_weight = criterion, 1.0 cd, criterion, criterion_name = get_criterion_callback( loss_name, prefix="mask4", input_key=INPUT_MASK_KEY, output_key=OUTPUT_MASK_4_KEY, loss_weight=float(loss_weight), ) criterions_dict.update(cd) callbacks.append(criterion) losses.append(criterion_name) print(OUTPUT_MASK_4_KEY, "Using loss", loss_name, loss_weight) if args.mask8 is not None: for criterion in args.mask8: if isinstance(criterion, (list, tuple)): loss_name, loss_weight = criterion else: loss_name, loss_weight = criterion, 1.0 cd, criterion, criterion_name = get_criterion_callback( loss_name, prefix="mask8", input_key=INPUT_MASK_KEY, output_key=OUTPUT_MASK_8_KEY, loss_weight=float(loss_weight), ) criterions_dict.update(cd) callbacks.append(criterion) losses.append(criterion_name) print(OUTPUT_MASK_8_KEY, "Using loss", loss_name, loss_weight) if args.mask16 is not None: for criterion in args.mask16: if isinstance(criterion, (list, tuple)): loss_name, loss_weight = criterion else: loss_name, loss_weight = criterion, 1.0 cd, criterion, criterion_name = get_criterion_callback( loss_name, prefix="mask16", input_key=INPUT_MASK_KEY, output_key=OUTPUT_MASK_16_KEY, loss_weight=float(loss_weight), ) criterions_dict.update(cd) callbacks.append(criterion) losses.append(criterion_name) print(OUTPUT_MASK_16_KEY, "Using loss", loss_name, loss_weight) if args.mask32 is not None: for criterion in args.mask32: if isinstance(criterion, (list, tuple)): loss_name, loss_weight = criterion else: loss_name, loss_weight = criterion, 1.0 cd, criterion, criterion_name = get_criterion_callback( loss_name, prefix="mask32", input_key=INPUT_MASK_KEY, output_key=OUTPUT_MASK_32_KEY, loss_weight=float(loss_weight), ) criterions_dict.update(cd) callbacks.append(criterion) losses.append(criterion_name) print(OUTPUT_MASK_32_KEY, "Using loss", loss_name, loss_weight) if disaster_type_loss is not None: callbacks += [ ConfusionMatrixCallback( input_key=DISASTER_TYPE_KEY, output_key=DISASTER_TYPE_KEY, class_names=DISASTER_TYPES, ignore_index=UNKNOWN_DISASTER_TYPE_CLASS, prefix=f"{DISASTER_TYPE_KEY}/confusion_matrix", ), AccuracyCallback( input_key=DISASTER_TYPE_KEY, output_key=DISASTER_TYPE_KEY, prefix=f"{DISASTER_TYPE_KEY}/accuracy", activation="Softmax", ), ] for criterion in disaster_type_loss: if isinstance(criterion, (list, tuple)): loss_name, loss_weight = criterion else: loss_name, loss_weight = criterion, 1.0 cd, criterion, criterion_name = get_criterion_callback( loss_name, prefix=DISASTER_TYPE_KEY, input_key=DISASTER_TYPE_KEY, output_key=DISASTER_TYPE_KEY, loss_weight=float(loss_weight), ignore_index=UNKNOWN_DISASTER_TYPE_CLASS, ) criterions_dict.update(cd) callbacks.append(criterion) losses.append(criterion_name) print(DISASTER_TYPE_KEY, "Using loss", loss_name, loss_weight) if damage_type_loss is not None: callbacks += [ # MultilabelConfusionMatrixCallback( # input_key=DAMAGE_TYPE_KEY, # output_key=DAMAGE_TYPE_KEY, # class_names=DAMAGE_TYPES, # prefix=f"{DAMAGE_TYPE_KEY}/confusion_matrix", # ), AccuracyCallback( input_key=DAMAGE_TYPE_KEY, output_key=DAMAGE_TYPE_KEY, prefix=f"{DAMAGE_TYPE_KEY}/accuracy", activation="Sigmoid", threshold=0.5, ) ] for criterion in damage_type_loss: if isinstance(criterion, (list, tuple)): loss_name, loss_weight = criterion else: loss_name, loss_weight = criterion, 1.0 cd, criterion, criterion_name = get_criterion_callback( loss_name, prefix=DAMAGE_TYPE_KEY, input_key=DAMAGE_TYPE_KEY, output_key=DAMAGE_TYPE_KEY, loss_weight=float(loss_weight), ) criterions_dict.update(cd) callbacks.append(criterion) losses.append(criterion_name) print(DAMAGE_TYPE_KEY, "Using loss", loss_name, loss_weight) if embedding_criterion is not None: cd, criterion, criterion_name = get_criterion_callback( embedding_criterion, prefix="embedding", input_key=INPUT_MASK_KEY, output_key=OUTPUT_EMBEDDING_KEY, loss_weight=1.0, ) criterions_dict.update(cd) callbacks.append(criterion) losses.append(criterion_name) print(OUTPUT_EMBEDDING_KEY, "Using loss", embedding_criterion) callbacks += [ CriterionAggregatorCallback(prefix="loss", loss_keys=losses), OptimizerCallback(accumulation_steps=accumulation_steps, decouple_weight_decay=False), ] optimizer = get_optimizer(optimizer_name, get_optimizable_parameters(model), learning_rate, weight_decay=weight_decay) scheduler = get_scheduler(scheduler_name, optimizer, lr=learning_rate, num_epochs=num_epochs, batches_in_epoch=len(loaders["train"])) if isinstance(scheduler, CyclicLR): callbacks += [SchedulerCallback(mode="batch")] print("Train session :", checkpoint_prefix) print(" FP16 mode :", fp16) print(" Fast mode :", args.fast) print(" Epochs :", num_epochs) print(" Workers :", num_workers) print(" Data dir :", data_dir) print(" Log dir :", log_dir) print("Data ") print(" Augmentations :", augmentations) print(" Train size :", len(loaders["train"]), len(train_ds)) print(" Valid size :", len(loaders["valid"]), len(valid_ds)) print(" Image size :", image_size) print(" Train on crops :", train_on_crops) print(" Balance :", balance) print(" Buildings only :", only_buildings) print(" Post transform :", enable_post_image_transform) print(" Pseudolabels :", pseudolabels_dir) print("Model :", model_name) print(" Parameters :", count_parameters(model)) print(" Dropout :", dropout) print("Optimizer :", optimizer_name) print(" Learning rate :", learning_rate) print(" Weight decay :", weight_decay) print(" Scheduler :", scheduler_name) print(" Batch sizes :", train_batch_size, valid_batch_size) print(" Criterion :", segmentation_losses) print(" Damage type :", damage_type_loss) print(" Disaster type :", disaster_type_loss) print(" Embedding :", embedding_criterion) # model training runner.train( fp16=fp16, model=model, criterion=criterions_dict, optimizer=optimizer, scheduler=scheduler, callbacks=callbacks, loaders=loaders, logdir=os.path.join(log_dir, "opl"), num_epochs=num_epochs, verbose=verbose, main_metric=main_metric, minimize_metric=False, checkpoint_data={"cmd_args": cmd_args}, ) # Training is finished. Let's run predictions using best checkpoint weights best_checkpoint = os.path.join(log_dir, "main", "checkpoints", "best.pth") model_checkpoint = os.path.join(log_dir, "main", "checkpoints", f"{checkpoint_prefix}.pth") clean_checkpoint(best_checkpoint, model_checkpoint) del optimizer, loaders
def main( config_path, log_dir=None, experiment_name=None, dataset_dir=None, num_epochs=50, num_labels=20, batch_size=38, num_workers=4, val_batch_size_multiplier=2, lr=1e-3, scheduler=None, optimizer=None, weight_decay=0, class_weight=None, check=False, verbose=True, cudnn_benchmark=True): if torch.cuda.is_available() and cudnn_benchmark: torch.backends.cudnn.benchmark = True # experiment setup logdir = log_dir + experiment_name num_epochs = num_epochs # data train_dataset = LipreadingDataset( phase = "train") valid_dataset = LipreadingDataset( phase = "val") loaders = { "train": DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True), "valid": DataLoader( valid_dataset, batch_size=val_batch_size_multiplier*batch_size, shuffle=True, num_workers=num_workers, drop_last=False) } # model, criterion, optimizer model = LipNext() criterion = torch.nn.CrossEntropyLoss(weight=class_weight) optimizer = optimizer(model.parameters(), lr=lr, weight_decay=weight_decay) if scheduler: scheduler = scheduler(optimizer) else: scheduler = scheduler # model runner device = torch.device("cpu") if torch.cuda.is_available(): device = torch.device("cuda") runner = SupervisedRunner(device=device) # callbacks acc_callback = AccuracyCallback(accuracy_args=[1, 3]) ckpt_callback = CheckpointCallbackV2(config_path=config_path) neg_mining_callback = NegativeMiningCallback() callbacks = [acc_callback, ckpt_callback, neg_mining_callback] # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, callbacks=callbacks, loaders=loaders, logdir=logdir, main_metric="accuracy01", minimize_metric=False, num_epochs=num_epochs, verbose=verbose, check=check )
logdir = "./logs/effnet-b0" fp16_params = None # dict(opt_level="O1") runner = SupervisedRunner(device='cuda') runner.train( model=model, criterion=criterion, scheduler=scheduler, optimizer=optimizer, loaders=loaders, callbacks=[ # wAUC(), F1ScoreCallback(), AUCCallback(num_classes=4), AccuracyCallback(prefix='ACC'), OptimizerCallback(accumulation_steps=args.acc)], logdir=logdir, num_epochs=num_epochs, fp16=fp16_params, verbose=True ) if args.test > 0: test_preds_proba: Union[List, Iterable, np.ndarray] = [] model.eval() progress_bar_test = tqdm(test_dataset) with torch.no_grad(): for i, im in enumerate(progress_bar_test): inputs = im.to('cuda') # flip horizontal im = kornia.augmentation.F.hflip(inputs)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[3, 8], gamma=0.3) # model runner runner = SupervisedRunner() # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ AccuracyCallback(accuracy_args=[1, 3, 5]), EarlyStoppingCallback(patience=2, min_delta=0.01), ], logdir=logdir, num_epochs=num_epochs, check=True, ) # In[ ]: # utils.plot_metrics( # logdir=logdir, # metrics=["loss", "accuracy01", "accuracy03", "_base/lr"]) # # Setup 5 - training with 1cycle
scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[3, 8], gamma=0.3 ) # model runner runner = SupervisedRunner() # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ AccuracyCallback(accuracy_args=[1, 3, 5]), EarlyStoppingCallback(patience=2, min_delta=0.01), ], logdir=logdir, num_epochs=num_epochs, check=True, ) # In[ ]: # utils.plot_metrics( # logdir=logdir, # metrics=["loss", "accuracy01", "accuracy03", "_base/lr"]) # # Setup 5 - training with 1cycle
loaders = OrderedDict() loaders["train"] = train_dl loaders["valid"] = valid_dl # model model = AttentionModel(INPUT_DIM, HID_DIM, OUTPUT_DIM, RECURRENT_Layers, DROPOUT).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [20, 60]) criterion = torch.nn.CrossEntropyLoss() # model training runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir=logdir, num_epochs=EPOCHS, verbose=True, callbacks=[ AccuracyCallback(num_classes=5, topk_args=[1, 2]), EarlyStoppingCallback(metric='accuracy01', minimize=False, patience=10) ], )
def main(): args = get_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus SEED = 42 utils.set_global_seed(SEED) utils.prepare_cudnn(deterministic=True) num_classes = 14 #define datasets train_dataset = ChestXrayDataSet( data_dir=args.path_to_images, image_list_file=args.train_list, transform=transforms_train, ) val_dataset = ChestXrayDataSet( data_dir=args.path_to_images, image_list_file=args.val_list, transform=transforms_val, ) loaders = { 'train': DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers), 'valid': DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=args.num_workers) } logdir = args.log_dir #where model weights and logs are stored #define model model = DenseNet121(num_classes) if len(args.gpus) > 1: model = nn.DataParallel(model) device = utils.get_device() runner = SupervisedRunner(device=device) optimizer = RAdam(model.parameters(), lr=args.lr, weight_decay=0.0003) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.25, patience=2) weights = torch.Tensor( [10, 100, 30, 8, 40, 40, 330, 140, 35, 155, 110, 250, 155, 200]).to(device) criterion = BCEWithLogitsLoss(pos_weight=weights) class_names = [ 'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia', 'Pneumothorax', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia' ] runner.train( model=model, logdir=logdir, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, num_epochs=args.epochs, # We can specify the callbacks list for the experiment; # For this task, we will check AUC and accuracy callbacks=[ AUCCallback( input_key="targets", output_key='logits', prefix='auc', class_names=class_names, num_classes=num_classes, activation='Sigmoid', ), AccuracyCallback( input_key="targets", output_key="logits", prefix="accuracy", accuracy_args=[1], num_classes=14, threshold=0.5, activation='Sigmoid', ), ], main_metric='auc/_mean', minimize_metric=False, verbose=True, )
def train_model( df_train, df_valid, model_class, model_params, vectorizer, general_params, ): vectorizer = copy.deepcopy(vectorizer) vectorizer.fit(df_train["text"]) df_train = make_df(df_train, vectorizer) train_ds = GeneralDataset( df_train["tokens"].values, labels=df_train["label"].values, max_sentence_len=general_params["max_sentence_len"], ) trainloader = DataLoader( dataset=train_ds, batch_size=general_params["batch_size"], shuffle=True, num_workers=general_params["num_workers"], ) df_valid = make_df(df_valid, vectorizer) valid_ds = GeneralDataset( df_valid["tokens"].values, labels=df_valid["label"].values, max_sentence_len=general_params["max_sentence_len"], ) validloader = DataLoader( dataset=valid_ds, batch_size=general_params["batch_size"], shuffle=False, num_workers=general_params["num_workers"], ) loaders = collections.OrderedDict() loaders["train"] = trainloader loaders["valid"] = validloader model_params = copy.deepcopy(model_params) model_params.update({"vocab_size": len(vectorizer.vocabulary_)}) model = model_class(**model_params).float() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), general_params["lr"]) runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, callbacks=[ AccuracyCallback(), EarlyStoppingCallback(patience=general_params["patience"], metric="accuracy01", minimize=False), ], logdir=general_params["logdir"], num_epochs=general_params["num_epochs"], main_metric="accuracy01", minimize_metric=False, load_best_on_end=True, verbose=False, ) with open(os.path.join(general_params["logdir"], "vectorizer.pickle"), "wb") as output_file: pickle.dump(vectorizer, output_file)
def main(): cifar_train = CIFAR10('.', train=True, transform=transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor() ]), download=True) cifar_test = CIFAR10('.', train=False, transform=transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor() ]), download=True) dl_train = DataLoader(cifar_train, batch_size=16) dl_test = DataLoader(cifar_test, batch_size=16) logdir = "./logdir/Adam" num_epochs = 10 loaders = {'train': dl_train, 'valid': dl_test} model = resnet34() for name, param in model.named_parameters(): param.requires_grad = True model.train() criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) runner = dl.SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=num_epochs, verbose=True, logdir=logdir, callbacks=[ logger.TensorboardLogger(), AccuracyCallback(num_classes=10) ], ) logdir = "./logdir/AdamW" model.apply(init_weights) optimizer = AdamW() runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=num_epochs, verbose=True, logdir=logdir, callbacks=[ logger.TensorboardLogger(), AccuracyCallback(num_classes=10) ], ) logdir = "./logdir/RAdam" model.apply(init_weights) optimizer = RAdam() runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=num_epochs, verbose=True, logdir=logdir, callbacks=[ logger.TensorboardLogger(), AccuracyCallback(num_classes=10) ], )
valid_dl = BaseDataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4) # experiment setup num_epochs = EPOCHS logdir = LOGDIR loaders = {"train": train_dl, "valid": valid_dl} criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(params=model.parameters(), lr=LR) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[15, 19, 22]) callbacks = [ AccuracyCallback(num_classes=2, activation='Sigmoid', threshold=0.5), ] runner = SupervisedRunner() # Train runner.train( model=model, criterion=criterion, optimizer=optimizer, callbacks=callbacks, loaders=loaders, logdir=logdir, num_epochs=num_epochs, verbose=1, scheduler=scheduler, # main_metric='f1_score',
scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[3, 8], gamma=0.3 ) # model runner runner = SupervisedRunner() # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ AccuracyCallback(topk_args=[1, 3, 5]), EarlyStoppingCallback(patience=2, min_delta=0.01), ], logdir=logdir, num_epochs=num_epochs, check=True, ) # In[ ]: # utils.plot_metrics( # logdir=logdir, # metrics=["loss", "accuracy01", "accuracy03", "_base/lr"]) # # Setup 5 - training with 1cycle
"train": train_dl, "valid": valid_dl } criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD([ {'params': model.layer1.parameters(), 'lr': LR / 10}, {'params': model.layer2.parameters(), 'lr': LR / 5}, {'params': model.layer3.parameters(), 'lr': LR / 2}, {'params': model.layer4.parameters(), 'lr': LR / 1}, ], lr=LR) # scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=[LR / 10, LR / 5, LR / 2, LR / 1], total_steps=100) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-7) # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, cooldown=2, min_lr=1e-7) callbacks = [ AccuracyCallback(num_classes=5, threshold=0.5, activation='Softmax'), F1ScoreCallback(input_key="targets_one_hot", activation='Softmax', threshold=0.5), ] runner = SupervisedRunner() ## Step 1. runner.train( model=model, criterion=criterion, optimizer=optimizer, callbacks=callbacks, loaders=loaders, logdir=logdir, num_epochs=num_epochs, verbose=1,
def main(): train_dataset = dataset.SentimentDataset( texts=df_train['sentences'].values.tolist(), labels=df_train['labels'].values, max_seq_length=config.MAX_SEQ_LENGTH, model_name=config.MODEL_NAME) valid_dataset = dataset.SentimentDataset( texts=df_valid['sentences'].values.tolist(), labels=df_valid['labels'].values, max_seq_length=config.MAX_SEQ_LENGTH, model_name=config.MODEL_NAME) train_val_loaders = { "train": DataLoader(dataset=train_dataset, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True), "valid": DataLoader(dataset=valid_dataset, batch_size=config.BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True) } dBert = model.DistilBert() param_optim = list(dBert.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] criterion = nn.CrossEntropyLoss() base_optimizer = RAdam([{ 'params': [p for n, p in param_optim if not any(nd in n for nd in no_decay)], 'weight_decay': config.WEIGHT_DECAY }, { 'params': [p for n, p in param_optim if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }]) optimizer = Lookahead(base_optimizer) scheduler = OneCycleLRWithWarmup( optimizer, num_steps=config.NUM_EPOCHS, lr_range=(config.LEARNING_RATE, 1e-8), init_lr=config.LEARNING_RATE, warmup_steps=0, ) runner = SupervisedRunner(input_key=("input_ids", "attention_mask")) # model training runner.train(model=dBert, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=train_val_loaders, callbacks=[ AccuracyCallback(num_classes=2), OptimizerCallback(accumulation_steps=config.ACCUM_STEPS), ], fp16=config.FP_16, logdir=config.LOG_DIR, num_epochs=config.NUM_EPOCHS, verbose=True)
def get_callbacks(num_classes): callbacks = [ AccuracyCallback(num_classes=num_classes), F1ScoreCallback(input_key="targets_one_hot", activation="Softmax") ] return callbacks
def objective(trial): logdir = "/clusterdata/uqyzha77/Log/vic/" num_epochs = 100 INPUT_DIM = 1 OUTPUT_DIM = 5 BATCH_SIZE = 64 # change here for multi gpu training 16*4=64 num_classes = 5 num_gpu = 1 lr = trial.suggest_loguniform("lr", 1e-3, 1e-1) # generate dataloader data_path = '/afm02/Q2/Q2067/MoDS/Dabang_Sheng/Data/VIC_ready2use150000_yz_filtered80210.csv' df_all = pd.read_csv(data_path) labels = df_all.iloc[:, 4].copy() columns_name = list(range(0, 276)) df2 = pd.DataFrame(df_all['VI_values'].str.slice( 1, -1).str.split().values.tolist(), columns=columns_name, dtype=float) X = df2 y = labels le = LabelEncoder() le.fit(y) print(le.classes_) class_names = le.classes_ y = le.transform(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED, stratify=y) X_train_resampled, y_train_resampled = X_train, y_train unique_elements, counts_elements = np.unique(y_train, return_counts=True) weights = [1 / i for i in counts_elements] weights[2] = weights[2] / 15 print(np.asarray((unique_elements, counts_elements))) print(weights) samples_weight = np.array([weights[t] for t in y_train]) samples_weights = torch.FloatTensor(samples_weight).to(device) class_weights = torch.FloatTensor(weights).to(device) sampler = torch.utils.data.sampler.WeightedRandomSampler( samples_weights, len(X_train_resampled), replacement=True) # prepare PyTorch Datasets X_train_tensor = numpy_to_tensor(X_train_resampled.to_numpy(), torch.FloatTensor) y_train_tensor = numpy_to_tensor(y_train_resampled, torch.long) X_test_tensor = numpy_to_tensor(X_test.to_numpy(), torch.FloatTensor) y_test_tensor = numpy_to_tensor(y_test, torch.long) X_train_tensor = torch.unsqueeze(X_train_tensor, 2) X_test_tensor = torch.unsqueeze(X_test_tensor, 2) train_ds = TensorDataset(X_train_tensor, y_train_tensor) valid_ds = TensorDataset(X_test_tensor, y_test_tensor) train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, sampler=sampler, drop_last=True, num_workers=0) valid_dl = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=False, drop_last=True, num_workers=0) # Catalyst loader: loaders = OrderedDict() loaders["train"] = train_dl loaders["valid"] = valid_dl # model model = AttentionModel(trial, BATCH_SIZE // num_gpu, INPUT_DIM, OUTPUT_DIM).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [20, 40, 60]) criterion = torch.nn.CrossEntropyLoss() # model training runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir=logdir, num_epochs=num_epochs, verbose=True, callbacks=[ AccuracyCallback(num_classes=num_classes), CatalystPruningCallback( trial, metric="accuracy01"), # top-1 accuracy as metric for pruning ], ) return runner.state.valid_metrics["accuracy01"]
fp16_params = dict(opt_level="O1") # params for FP16 else: fp16_params = None runner = SupervisedRunner(device=device) runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, # We can specify the callbacks list for the experiment; # For this task, we will check accuracy, AUC and F1 metrics callbacks=[ AccuracyCallback(num_classes=config.num_classes), AUCCallback( num_classes=config.num_classes, input_key="targets_one_hot", class_names=config.class_names ), F1ScoreCallback( input_key="targets_one_hot", activation="Softmax" ), CheckpointCallback( save_n_best=1, # resume_dir="./models/classification", metrics_filename="metrics.json" ), EarlyStoppingCallback(
set_global_seed(params["general"]["seed"]) prepare_cudnn(deterministic=True) # here we specify that we pass masks to the runner. So model's forward method will be called with # these arguments passed to it. runner = SupervisedRunner(input_key=("features", "attention_mask")) # finally, training the model with Catalyst runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=train_val_loaders, callbacks=[ AccuracyCallback(num_classes=int(params["model"]["num_classes"])), OptimizerCallback( accumulation_steps=int(params["training"]["accum_steps"])), ], logdir=params["training"]["log_dir"], num_epochs=int(params["training"]["num_epochs"]), verbose=True, ) # and running inference torch.cuda.empty_cache() runner.infer( model=model, loaders=test_loaders, callbacks=[ CheckpointCallback(
def fit(self, train_df, dev_df, batch_size=16, max_seq_length=256, learning_rate=5e-5, epochs=1, log_dir=None, verbose=False): start = time.time() config = { "model_name": self.model_name, "batch_size": batch_size, "max_seq_length": max_seq_length, "learning_rate": learning_rate, "epochs": epochs, "log_dir": log_dir } train_y = train_df[0] train_X = train_df[1] label2id = dict( zip(sorted(set(train_y)), range(len(set(train_y)))) ) self.id2label = {v: k for k, v in label2id.items()} num_labels = len(label2id) self.train_data = ClassificationDataset( tokenizer=self.tokenizer, label2id=label2id, max_seq_length=max_seq_length, texts=train_X, labels=train_y ) dev_y = dev_df[0] dev_X = dev_df[1] self.dev_data = ClassificationDataset( tokenizer=self.tokenizer, label2id=label2id, max_seq_length=max_seq_length, texts=dev_X, labels=dev_y ) train_dev_loaders = { "train": DataLoader( dataset=self.train_data, batch_size=batch_size, shuffle=True ), "valid": DataLoader( dataset=self.dev_data, batch_size=batch_size, shuffle=False ) } model = BERTBaseJapaneseModel(self.model_name, num_labels) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer) self.runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=train_dev_loaders, callbacks=[ AccuracyCallback(num_classes=num_labels), ], fp16=None, logdir=log_dir, num_epochs=epochs, verbose=verbose ) self.elapsed_time = time.time() - start config["elapsed_time"] = self.elapsed_time if os.path.exists(f"{log_dir}/checkpoints"): filename = f"{log_dir}/checkpoints/config.pkl" with open(filename, "wb") as f: pickle.dump([label2id, config], f)
momentum_range=(0.85, 0.95), ) else: step = len(range(0, args.num_epochs, 4)) milestones = [step * i for i in range(1, 4)] scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1) runner = SupervisedRunner(input_key='features', output_key=['embeddings', 'logits']) callbacks = [ AccuracyCallback( num_classes=args.num_classes, accuracy_args=[1], activation="Softmax", ), CriterionCallback(input_key="targets", prefix="loss", criterion_key="ce"), ] if args.triplet_loss: callbacks.extend([ CriterionCallback(input_key="targets", output_key="embeddings", prefix="loss", criterion_key="htl"), CriterionAggregatorCallback(prefix="loss", loss_keys=["ce", "htl"], loss_aggregate_fn="sum") ])