def get_lightning_data_module(config): # Get your data, preprocess, and create the LightningDataModule train, val, test = get_data() ldm = PLDataModuleFromDatasets( train, val=val, test=test, seed=config.seed, **config.data ) return ldm
def train_mnist(config, train=None, val=None): # Convert dictionary to omegaconf dictconfig object config = OmegaConf.create(config) # Create data module ldm = PLDataModuleFromDatasets(train, val=val, seed=config.seed, no_test_set=True, **config.data) # Create model, optimizer, criterion, scheduler model = Net(**config.model) optimizer = getattr(optim, config.optimizer)(model.parameters(), **config.optim) criterion = nn.CrossEntropyLoss() lr_scheduler = None if config.lr_scheduler: lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, **config.lr_schedule) # Wrap in PLModule, & configure metrics lm = PLModule( model, optimizer, criterion, lr_scheduler=lr_scheduler, metrics={"acc": FromLogits(pl.metrics.classification.Accuracy()) }, # Will log train_acc and val_acc hparams=config, ) # Map Lightning metrics to ray tune metris. metrics_map = {"accuracy": "val_acc", "validation_loss": "val_loss"} assert (config["tune"]["metric"] in metrics_map.keys( )), "Metrics mapping should contain the metric you are trying to optimize" # Train model trainer = make_trainer_for_ray_tune(metrics_map=metrics_map, **config.trainer) trainer.fit(lm, datamodule=ldm)
if config.trainer.experiment_name == "experiment": config.trainer.experiment_name = "mnist-rnn-classification" configure_logging(f"logs/{config.trainer.experiment_name}") if config.seed is not None: logger.info("Seeding everything with seed={seed}") pl.utilities.seed.seed_everything(seed=config.seed) train, test = get_data() # Get data and make datamodule ########################## ldm = PLDataModuleFromDatasets(train, test=test, seed=config.seed, collate_fn=collate_fn, **config.data) # Create model, optimizer, criterion, scheduler ########### model = Net(28, **config.model) optimizer = getattr(optim, config.optimizer)(model.parameters(), **config.optim) criterion = nn.CrossEntropyLoss() lr_scheduler = None if config.lr_scheduler: lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, **config.lr_schedule)
def train_mosei(config, train=None, val=None): # Convert dictionary to omegaconf dictconfig object config = OmegaConf.create(config) modalities = set(config.modalities) train, val, _ = get_data( remove_pauses=config.preprocessing.remove_pauses, pad=config.preprocessing.pad, max_length=config.preprocessing.max_length, modalities=modalities, ) collate_fn = MultimodalSequenceClassificationCollator( device="cpu", modalities=modalities ) # Create data module config.data.batch_size_eval = config.data.batch_size ldm = PLDataModuleFromDatasets( train, val=val, no_test_set=True, batch_size=config.data.batch_size, batch_size_eval=config.data.batch_size_eval, collate_fn=collate_fn, pin_memory=config.data.pin_memory, num_workers=config.data.num_workers, ) # Create model, optimizer, criterion, scheduler model = TransformerLateFusionClassifier( config.model.feature_sizes, 1, max_length=1024, nystrom=False, kernel_size=config.model.kernel_size, num_layers=config.model.num_layers, num_heads=config.model.num_heads, dropout=config.model.dropout, hidden_size=config.model.hidden_size, inner_size=config.model.inner_size_multiple * config.model.hidden_size, prenorm=False, scalenorm=config.model.scalenorm, multi_modal_drop=config.model.multi_modal_drop, p_mmdrop=config.model.p_mmdrop, # p_drop_modalities=config.model.p_drop_modalities, ) optimizer = getattr(optim, config.optimizer)(model.parameters(), **config.optim) criterion = nn.L1Loss() lr_scheduler = None if config.lr_scheduler: lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, **config.lr_schedule ) lm = MultimodalTransformerClassificationPLModule( model, optimizer, criterion, lr_scheduler=lr_scheduler, hparams=config, metrics={ "acc2": MoseiAcc2(exclude_neutral=True), "acc2_zero": MoseiAcc2(exclude_neutral=False), "acc5": MoseiAcc5(), "acc7": MoseiAcc7(), "f1": MoseiF1(exclude_neutral=True), "f1_zero": MoseiF1(exclude_neutral=False), "mae": torchmetrics.MeanAbsoluteError(), }, ) # lm = MultimodalTransformerClassificationPLModule( # model, # optimizer, # criterion, # lr_scheduler=lr_scheduler, # hparams=config, # ) # Map Lightning metrics to ray tune metris. metrics_map = { "validation_loss": "best_score", "val_accuracy": "acc2", "val_f1": "f1", "acc5": "acc5", "acc7": "acc7", } assert ( config["tune"]["metric"] in metrics_map.keys() ), "Metrics mapping should contain the metric you are trying to optimize" # Train model trainer = make_trainer_for_ray_tune(metrics_map=metrics_map, **config.trainer) trainer.fit(lm, datamodule=ldm)
def train_mosi(config, train=None, val=None): # Convert dictionary to omegaconf dictconfig object config = OmegaConf.create(config) train, val, _ = get_data( remove_pauses=config.preprocessing.remove_pauses, pad=config.preprocessing.pad, max_length=config.preprocessing.max_length, ) collate_fn = MultimodalSequenceClassificationCollator(device="cpu") # Create data module config.data.batch_size_eval = config.data.batch_size ldm = PLDataModuleFromDatasets( train, val=val, seed=config.seed, no_test_set=True, collate_fn=collate_fn, **config.data ) feature_sizes = {"audio": 74, "visual": 35, "text": 300} # Create model, optimizer, criterion, scheduler model = TransformerLateFusionClassifier( feature_sizes, 1, max_length=512, nystrom=False, kernel_size=config.model.kernel_size, num_layers=config.model.num_layers, num_heads=config.model.num_heads, dropout=config.model.dropout, hidden_size=config.model.hidden_size, inner_size=config.model.inner_size_multiple * config.model.hidden_size, # inner_size=config.model.inner_size, prenorm=config.model.prenorm, scalenorm=config.model.scalenorm, ) optimizer = getattr(optim, config.optimizer)(model.parameters(), **config.optim) criterion = nn.MSELoss() lr_scheduler = None if config.lr_scheduler: lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, **config.lr_schedule ) lm = MultimodalTransformerClassificationPLModule( model, optimizer, criterion, lr_scheduler=lr_scheduler, hparams=config, ) # Map Lightning metrics to ray tune metris. metrics_map = {"validation_loss": "best_score"} assert ( config["tune"]["metric"] in metrics_map.keys() ), "Metrics mapping should contain the metric you are trying to optimize" # Train model trainer = make_trainer_for_ray_tune(metrics_map=metrics_map, **config.trainer) trainer.fit(lm, datamodule=ldm)