def get_wandb_logger(name: Optional[str] = None): offline = "WANDB_API_KEY" not in os.environ project = os.environ.get("WANDB_PROJECT", "osic-pulmonary-fibrosis-progression") return WandbLogger(name=name, offline=offline, project=project, log_model=True)
def main(hparams): model = TransformerMarco(hparams) loggers = [] if hparams.use_wandb: wandb_logger = WandbLogger(project='long-marco', entity='usiir', name=f'Albert-passage-{hparams.slurm_job_id}') wandb_logger.log_hyperparams(hparams) loggers.append(wandb_logger) if hparams.use_tensorboard: tb_logger = TensorBoardLogger("tb_logs", name=f"Longformer-docs", version=hparams.slurm_job_id) loggers.append(tb_logger) checkpoint_callback = ModelCheckpoint( filepath=os.path.join(os.getcwd(), 'checkpoints'), save_top_k=3, verbose=True, monitor='val_epoch_loss', mode='min', prefix='' ) # This Trainer handles most of the stuff. # Enables distributed training with one line: # https://towardsdatascience.com/trivial-multi-node-training-with-pytorch-lightning-ff75dfb809bd trainer = pl.Trainer( gpus=hparams.gpus, num_nodes=hparams.num_nodes, distributed_backend=hparams.distributed_backend, # control the effective batch size with this param accumulate_grad_batches=hparams.trainer_batch_size, # Training will stop if max_steps or max_epochs have reached (earliest). max_epochs=hparams.epochs, max_steps=hparams.num_training_steps, logger=loggers, checkpoint_callback=checkpoint_callback, # progress_bar_callback=False, # progress_bar_refresh_rate=0, # use_amp=True --> use 16bit precision # val_check_interval=0.25, # val 4 times during 1 train epoch val_check_interval=hparams.val_check_interval, # val every N steps # num_sanity_val_steps=5, # fast_dev_run=True ) trainer.fit(model)
def get_logger(cls, save_dir: str, version=0, offline=False, anonymous=False, project=None, log_model=True, **__) -> LightningLoggerBase: return WandbLogger(save_dir=save_dir, version=str(version), offline=offline, anonymous=anonymous, project=project, log_model=log_model)
def build_trainer( self, trainer_params: Dict, callback_list: List[Callback], logger_dict: Dict, checkpoint_dict: Dict, device: Optional[Union[str, int]] = None, ) -> Trainer: if isinstance(device, str): device: Union[str, List[int]] = json.loads(device) if not isinstance(device, (str, list)): raise ValueError(f"Device is not valid: {device}") trainer_params["gpus"] = device logger: WandbLogger = WandbLogger(**logger_dict) model_checkpointer: ModelCheckpoint = ModelCheckpoint( **checkpoint_dict) callback_list += model_checkpointer return Trainer(callbacks=callback_list, logger=logger, *trainer_params)
def get_wandb_logger(name: Optional[str] = None): offline = "WANDB_API_KEY" not in os.environ project = os.environ.get("WANDB_PROJECT", "siim_isic_melanoma_classification") return WandbLogger(name=name, offline=offline, project=project, log_model=True)
random.seed(seed) hparams = vars(opt) hparams['git_id'] = get_current_git_hash() hparams['batch_size'] = opt.batch_size hparams['num_workers'] = 0 hparams['bands'] = len(util.get_wavelengths_for(opt.camera_type)) print("Hparams: %s" % hparams) all_records = get_for_camera_type( get_for_fruit(all_fruits, hparams['fruit']), hparams['camera_type']) model = LayerClassifierModule(hparams, all_records) logger = WandbLogger(hparams['git_id'], offline=not opt.online_logging, save_dir=opt.log_path, project='deephs-layerclassifier') early_stop_callback = EarlyStopping(monitor='val_loss', min_delta=0.00, patience=3, verbose=False, mode='max') checkpoint_callback = ModelCheckpoint(filepath='best.ckpt', save_top_k=1, verbose=True, monitor='val_loss', mode='min') trainer = lightning.Trainer(max_epochs=opt.num_epochs,
from model import Network from text_data import MemoryMapDataset from labeler import Labeler, SpacySentenceTokenizer, SpacyWordTokenizer def store_code(run): dst = Path(run.dir) / "code" / "train" dst.mkdir(parents=True, exist_ok=True) src = (Path(__file__) / "..").resolve() for f in glob(str(src / "*.py")): shutil.copy(f, dst) if __name__ == "__main__": wandb_logger = WandbLogger(project="nnsplit") parser = Network.get_parser() parser.set_defaults(logger=wandb_logger) hparams = parser.parse_args() if hparams.logger: store_code(wandb_logger.experiment) labeler = Labeler([ SpacySentenceTokenizer("de_core_news_sm", lower_start_prob=0.7, remove_end_punct_prob=0.7), SpacyWordTokenizer("de_core_news_sm"), ])
betas=(0.28, 0.93), weight_decay=0.01) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, '25,35', gamma=0.1) return [optimizer], [scheduler] def train_dataloader(self): return tg.loader.DataLoader(list(self.dataset), num_workers=self.num_workers, pin_memory=False, shuffle=True) def val_dataloader(self): return tg.loader.DataLoader(list(self.dataset), num_workers=self.num_workers, pin_memory=False, shuffle=True) if __name__ == '__main__': os.environ['CUDA_LAUNCH_BLOCKING'] = '1' data_dir = os.path.join('GraphCoAttention', 'data') wandb.init() wandb_logger = WandbLogger(project='flux', log_model='all') trainer = pl.Trainer(gpus=[0], max_epochs=2000, check_val_every_n_epoch=500, accumulate_grad_batches=1) trainer.fit(Learner(data_dir))
encoder = FixNineYearOldCodersJunk(encoder) else: encoder = torch.hub.load(repo_or_dir='rwightman/gen-efficientnet-pytorch', model=args.vision_model, pretrained=args.pretrained) encoder.conv_stem = nn.Conv2d(dm.num_channels, 32, kernel_size=3, stride=2, padding=1, bias=False) if encoder.classifier.in_features == 2048: encoder.classifier = nn.Identity() else: encoder.classifier = nn.Linear(encoder.classifier.in_features, 2048, bias=False) model = BYOL(encoder=encoder, **args.__dict__) save_dir = f'./{args.vision_model}-{args.input_size}' Path(save_dir).mkdir(parents=True, exist_ok=True) wandb_logger = WandbLogger(project=f"byol-{args.dataset}-breakout", save_dir=save_dir, log_model=False) # finetune in real-time if args.old_eval: online_eval = OldSSLOnlineEvaluator(dataset=args.dataset, z_dim=2048, num_classes=dm.num_classes) else: online_eval = SSLOnlineEvaluator(dataset=args.dataset, z_dim=2048, num_classes=dm.num_classes, name_classes=dm.name_classes) image_viewer = CV2ModelImageSampler(nrow=32) # DEFAULTS used by the Trainer callbacks = [online_eval] if args.debug: callbacks += [image_viewer] trainer = pl.Trainer.from_argparse_args(args, max_steps=300000, callbacks=callbacks)
def train_dataloader(self): qm9_dataloader = tg.loader.DataLoader(list(self.qm9_dataset), batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=False, shuffle=True) ddi_dataloader = tg.loader.DataLoader(list(self.ddi_dataset), batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=False, shuffle=True) loaders = {"QM9": qm9_dataloader, 'DDI': ddi_dataloader} return loaders def val_dataloader(self): qm9_dataloader = tg.loader.DataLoader(list(self.qm9_dataset), batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=False, shuffle=True) ddi_dataloader = tg.loader.DataLoader(list(self.ddi_dataset), batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=False, shuffle=True) # loaders = {"QM9": qm9_dataloader, 'DDI': ddi_dataloader} loaders = [qm9_dataloader, ddi_dataloader] return loaders if __name__ == '__main__': os.environ['CUDA_LAUNCH_BLOCKING'] = '1' data_dir = os.path.join('GraphCoAttention', 'data') wandb.init() wandb_logger = WandbLogger("jogarch", project='flux', log_model='all') trainer = pl.Trainer(gpus=[0], max_epochs=2000, check_val_every_n_epoch=500, accumulate_grad_batches=1) trainer.fit(Learner(data_dir, bs=15, lr=0.001, n_cycles=40, hidden_dim=225, n_head=5))
hparams['batch_size'] = opt.batch_size hparams['num_workers'] = 0 hparams['bands'] = len(util.get_wavelengths_for(opt.camera_type)) print("Hparams: %s" % hparams) all_records = get_for_camera_type( get_for_fruit(all_fruits, hparams['fruit']), hparams['camera_type']) # use only a part of the recordings selected_records = np.random.choice(all_records, 20) autoencoder = FalseColorAutoencoderModule(hparams, selected_records) logger = WandbLogger(hparams['git_id'], offline=not opt.online_logging, save_dir=opt.log_path, project='deephs_pretrained_autoencoder') checkpoint_callback = ModelCheckpoint(filepath='best.ckpt', save_top_k=1, verbose=True, monitor='val_loss', mode='min') trainer = lightning.Trainer(max_epochs=200, gpus=-1, logger=logger, checkpoint_callback=checkpoint_callback) trainer.fit(autoencoder) best_autoencoder = FalseColorAutoencoderModule.load_from_checkpoint(
y_hat = self(x) loss = F.cross_entropy(y_hat, y) tensorboard_logs = {'train_loss': loss} return {'loss': loss, 'log': tensorboard_logs} def validation_step(self, batch, batch_idx): # OPTIONAL x, y = batch y_hat = self(x) return {'val_loss': F.cross_entropy(y_hat, y)} def validation_epoch_end(self, outputs): # OPTIONAL avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() tensorboard_logs = {'val_loss': avg_loss} return {'val_loss': avg_loss, 'log': tensorboard_logs} experiments_names = '_'.join([str(v) for v in config.values()]) print(experiments_names) wandb_logger = WandbLogger(name=experiments_names, project='tiny-imagenet') model = TinyImagenetModel(BASE_DIR, df=val_df, config=config, augmentation_func=seq_cutout.augment) trainer = pl.Trainer(max_epochs=50, gpus=1, logger=wandb_logger) trainer.fit(model)