def get_datasets( self, stage: str, num_samples_per_class: int = None) -> "OrderedDict[str, Dataset]": """Provides train/validation datasets from MNIST dataset.""" num_samples_per_class = num_samples_per_class or 320 datasets = OrderedDict() for mode in ("train", "valid"): dataset = MNIST( "./data", train=(mode == "train"), download=True, transform=self.get_transform(stage=stage, mode=mode), ) if mode == "train": dataset = { "dataset": dataset, "sampler": BalanceClassSampler(labels=dataset.targets, mode=num_samples_per_class), } datasets[mode] = dataset return datasets
def train_fold(fold_number): train_dataset = DatasetRetriever( image_ids=df_folds[df_folds['fold'] != fold_number].index.values, labels=df_folds[df_folds['fold'] != fold_number].target.values, transforms=get_train_transforms(), ) df_val = df_folds[(df_folds['fold'] == fold_number) & (df_folds['source'] == 'ISIC20')] validation_dataset = DatasetRetriever( image_ids=df_val.index.values, labels=df_val.target.values, transforms=get_valid_transforms(), ) train_loader = torch.utils.data.DataLoader( train_dataset, sampler=BalanceClassSampler(labels=train_dataset.get_labels(), mode="downsampling"), batch_size=TrainGlobalConfig.batch_size, pin_memory=False, drop_last=True, num_workers=TrainGlobalConfig.num_workers, ) val_loader = torch.utils.data.DataLoader( validation_dataset, batch_size=TrainGlobalConfig.batch_size, num_workers=TrainGlobalConfig.num_workers, shuffle=False, sampler=SequentialSampler(validation_dataset), pin_memory=False, ) return train_loader
def _data_loader(self): train_trans = dlib.get_train_augmentations() test_trans = dlib.get_test_augmentations() df_train = pd.read_csv(self.args.train_file) df_test = pd.read_csv(self.args.test_file) if self.args.is_debug: df_train = df_train.sample(frac=1.0).iloc[:2 * self.args.batch_size, :] df_test = df_test.sample(frac=1.0).iloc[:2 * self.args.batch_size, :] print('### Debug mode was going ###') labels = list(df_train.target.values) sampler = BalanceClassSampler(labels, mode="upsampling") self.data['train_loader'] = DataLoader(dlib.DataBase( df_train, self.args.data_path, train_trans), batch_size=self.args.batch_size, sampler=sampler) self.data['test_loader'] = DataLoader( dlib.DataBase(df_test, self.args.data_path, test_trans), batch_size=self.args.batch_size, \ shuffle=False, drop_last=False) print('Data loading was finished ...')
def run_training(): device = torch.device('cuda:1') train_loader = torch.utils.data.DataLoader( train_dataset, sampler=BalanceClassSampler(labels=train_dataset.get_labels(), mode="downsampling"), batch_size=TrainGlobalConfig.batch_size, pin_memory=False, drop_last=True, num_workers=TrainGlobalConfig.num_workers, ) val_loader = torch.utils.data.DataLoader( validation_dataset, batch_size=TrainGlobalConfig.batch_size, num_workers=TrainGlobalConfig.num_workers, shuffle=False, sampler=SequentialSampler(validation_dataset), pin_memory=False, ) fitter = Fitter(model=net, device=device, config=TrainGlobalConfig) #fitter.load('../input/checkpoint2/best-checkpoint-001epoch.bin') fitter.load(f'{fitter.base_dir}/last-checkpoint.bin') fitter.fit(train_loader, val_loader)
def run_training(): device = torch.device('cuda:0') train_loader = torch.utils.data.DataLoader( train_dataset, sampler=BalanceClassSampler(labels=train_dataset.get_labels(), mode="downsampling"), batch_size=global_config.GPU_BATCH_SIZE, pin_memory=False, drop_last=True, num_workers=global_config.num_workers, ) val_loader = torch.utils.data.DataLoader( validation_dataset, batch_size=global_config.GPU_BATCH_SIZE, num_workers=global_config.num_workers, shuffle=False, sampler=SequentialSampler(validation_dataset), pin_memory=False, ) print(f"\n>>> Total training examples: {len(train_loader) * global_config.GPU_BATCH_SIZE}") net = EfficientNet_Model(device=device, config=global_config, steps=len(train_loader)) # net = Pretrained_Model(device=device, config=global_config, steps=len(train_loader)) # Continue training proc if global_config.CONTINUE_TRAIN: net.load(global_config.CONTINUE_TRAIN) print(">>> Loaded pretrained model to continue trianing!") net.fit(train_loader, val_loader)
def train_dataloader(self): return DataLoader( dataset=self.train_dataset, batch_size=self.batch_size, num_workers=8, sampler=BalanceClassSampler(self.train_dataset.labels), )
def train_dataloader(self): transforms = get_train_augmentations(self.hparams.image_size) df = pd.read_csv(self.hparams.train_df) try: face_detector = self.hparams.face_detector except AttributeError: face_detector = None dataset = Dataset(df, self.hparams.path_root, transforms, face_detector=face_detector) if self.hparams.use_balance_sampler: labels = list(df.target.values) sampler = BalanceClassSampler(labels, mode="upsampling") shuffle = False else: sampler = None shuffle = True dataloader = torch.utils.data.DataLoader( dataset, batch_size=self.hparams.batch_size, num_workers=self.hparams.num_workers_train, sampler=sampler, shuffle=shuffle, ) return dataloader
def get_datasets( self, stage: str, num_samples_per_class: int = None ) -> "OrderedDict[str, Dataset]": """Provides train/validation datasets from MNIST dataset.""" num_samples_per_class = num_samples_per_class or 320 datasets = super().get_datasets(stage=stage) datasets["train"] = { "dataset": datasets["train"], "sampler": BalanceClassSampler( labels=datasets["train"].targets, mode=num_samples_per_class ), } return datasets
def train_dataloader(self) -> DataLoader: train_dataset = DatasetRetriever( records=self.training_records, transforms=self.training_configs.transforms) train_loader = torch.utils.data.DataLoader( train_dataset, sampler=BalanceClassSampler(labels=train_dataset.get_labels(), mode="downsampling"), batch_size=self.training_configs.batch_size, pin_memory=True, drop_last=True, num_workers=self.training_configs.num_workers, ) return train_loader
def create_loaders(self, train_df, val_df): labels = [(x["mask_pxl"] == 0) * 1 for x in train_df] sampler = BalanceClassSampler(labels, mode="upsampling") train_loader = UtilsFactory.create_loader(train_df, open_fn=self.get_input_pair, batch_size=self.batch_size, num_workers=self.num_workers, shuffle=sampler is None, sampler=sampler) labels = [(x["mask_pxl"] == 0) * 1 for x in val_df] sampler = BalanceClassSampler(labels, mode="upsampling") valid_loader = UtilsFactory.create_loader(val_df, open_fn=self.get_input_pair, batch_size=self.batch_size, num_workers=self.num_workers, shuffle=sampler is None, sampler=sampler) loaders = collections.OrderedDict() loaders['train'] = train_loader loaders['valid'] = valid_loader return loaders
def _mp_fn(rank, flags): device = xm.xla_device() net.to(device) train_sampler = DistributedSamplerWrapper(sampler=BalanceClassSampler( labels=train_dataset.get_labels(), mode="downsampling"), num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=True) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=TrainGlobalConfig.batch_size, sampler=train_sampler, pin_memory=False, drop_last=True, num_workers=TrainGlobalConfig.num_workers, ) if rank == 0: time.sleep(1) fitter = TPUFitter(model=net, device=device, config=TrainGlobalConfig) fitter.fit(train_loader) if os.path.isfile(args.val_file) and args.val_tune == 1: validation_sampler = torch.utils.data.distributed.DistributedSampler( validation_dataset, num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=False) validation_loader = torch.utils.data.DataLoader( validation_dataset, batch_size=TrainGlobalConfig.batch_size, sampler=validation_sampler, pin_memory=False, drop_last=False, num_workers=TrainGlobalConfig.num_workers) validation_tune_sampler = torch.utils.data.distributed.DistributedSampler( validation_tune_dataset, num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=True) validation_tune_loader = torch.utils.data.DataLoader( validation_tune_dataset, batch_size=TrainGlobalConfig.batch_size, sampler=validation_tune_sampler, pin_memory=False, drop_last=False, num_workers=TrainGlobalConfig.num_workers) fitter.run_tuning_and_inference(validation_tune_loader)
def _change_dl(k, dl, shuffle): old_dl = dl train_sampler = DistributedSamplerWrapper( sampler=BalanceClassSampler(labels=k.train_dataset.get_labels(), mode="downsampling"), num_replicas=8, # xm.xrt_world_size(), rank=0, # xm.get_ordinal(), it only get 1/8 data .... shuffle=True) train_loader = torch.utils.data.DataLoader( k.train_dataset, batch_size=k.config.batch_size, sampler=train_sampler, pin_memory=True, drop_last=True, num_workers=k.config.num_workers, ) new_dl = train_loader return old_dl, new_dl, train_sampler
def run_training(fitter): train_loader = torch.utils.data.DataLoader( train_dataset, sampler=BalanceClassSampler(labels=train_dataset.get_labels(), mode="downsampling"), batch_size=TrainGlobalConfig.batch_size, pin_memory=False, drop_last=True, num_workers=TrainGlobalConfig.num_workers, ) val_loader = torch.utils.data.DataLoader( validation_dataset, batch_size=TrainGlobalConfig.batch_size, num_workers=TrainGlobalConfig.num_workers, shuffle=False, sampler=SequentialSampler(validation_dataset), pin_memory=False, ) fitter.fit(train_loader, val_loader)
def test_balance_class_sampler(): """Test for BalanceClassSampler.""" bs = 32 data = MNIST(os.getcwd(), train=False, download=True) for mode in ["downsampling", "upsampling", 100, 200, 500]: sampler = BalanceClassSampler(data.targets.cpu().numpy().tolist(), mode=mode) loader = DataLoader(data, sampler=sampler, batch_size=bs) y_list = [] for _, y in loader: # assert len(x) == len(y) == bs y_list.extend(y.cpu().numpy().tolist()) # prior if mode == "downsampling": mode = 892 if mode == "upsampling": mode = 1135 assert all( n == mode for n in Counter(y_list).values() ), f"Each class shoud contain {mode} instances"
def get_datasets( self, stage: str, *args, **kwargs ): kwargs["transforms"] = get_transforms(kwargs["image_size"]) datasets = get_datasets(*args, **kwargs) datasets["train"] = { "dataset": datasets["train"], "sampler": BalanceClassSampler( labels=[datasets["train"].get_label(i) for i in range(len(datasets["train"]))], mode="upsampling", ) } for key, dataset in datasets.items(): if "infer" in key: datasets[key] = { "dataset": datasets[key], "shuffle": False, "drop_last": False } # Infer stage if stage.startswith("infer"): datasets["infer_train"] = datasets["train"] del datasets["train"] datasets["infer_valid"] = datasets["valid"] del datasets["valid"] if "doe" in stage: datasets["infer_valid"] = datasets["valid"] del datasets["valid"] return datasets
def train_fold(fold_number): train_dataset = DatasetRetriever( image_ids=df_folds[df_folds['fold'] != fold_number].index.values, labels=df_folds[df_folds['fold'] != fold_number].target.values, transforms=get_train_transforms(), ) df_val = df_folds[(df_folds['fold'] == fold_number) & (df_folds['source'] == 'ISIC20')] validation_dataset = DatasetRetriever( image_ids=df_val.index.values, labels=df_val.target.values, transforms=get_valid_transforms(), ) train_loader = torch.utils.data.DataLoader( train_dataset, sampler=BalanceClassSampler(labels=train_dataset.get_labels(), mode="downsampling"), batch_size=TrainGlobalConfig.batch_size, pin_memory=False, drop_last=True, num_workers=TrainGlobalConfig.num_workers, ) val_loader = torch.utils.data.DataLoader( validation_dataset, batch_size=TrainGlobalConfig.batch_size, num_workers=TrainGlobalConfig.num_workers, shuffle=False, sampler=SequentialSampler(validation_dataset), pin_memory=False, ) fitter = Fitter(model=net, device=torch.device('cuda:0'), config=TrainGlobalConfig, folder=f'fold{fold_number}') fitter.load(BASE_STATE_PATH) f_true, f_pred = fitter.fit(train_loader, val_loader) return f_true, f_pred
def run(fold, num_epochs=6): NUM_EPOCHS = num_epochs train_df = df.query(f'kfold_task2!={fold}').reset_index(drop=True) valid_df = df.query(f'kfold_task2=={fold}').reset_index(drop=True) #export train_ds = utils.create_loader(train_df.text.values, train_df.task2_encoded, bs=config.TRAIN_BATCH_SIZE, ret_dataset=True) train_dl = utils.create_loader(train_df.text.values, train_df.task2_encoded, bs=config.TRAIN_BATCH_SIZE, sampler=BalanceClassSampler( labels=train_ds.get_labels(), mode="upsampling")) valid_dl = utils.create_loader(valid_df.text.values, valid_df.task2_encoded, bs=config.VALID_BATCH_SIZE) #export modeller = model.HasocModel(len(le.classes_), drop=0.6) #export model_params = list(modeller.named_parameters()) #export # we don't want weight decay for these no_decay = ['bias', 'LayerNorm.weight', 'LayerNorm.bias'] optimizer_params = [ { 'params': [p for n, p in model_params if n not in no_decay], 'weight_decay': 0.001 }, # no weight decay should be applied { 'params': [p for n, p in model_params if n in no_decay], 'weight_decay': 0.0 } ] #export # lr = config.LR lr = 1e-4 #export optimizer = AdamW(optimizer_params, lr=lr) #export num_train_steps = int( len(df) / config.TRAIN_BATCH_SIZE * config.NUM_EPOCHS) #export scheduler = get_linear_schedule_with_warmup( optimizer=optimizer, num_warmup_steps=20, num_training_steps=num_train_steps - 20) #export # fit = engine.BertFitter(modeller, (train_dl, valid_dl), optimizer, nn.CrossEntropyLoss(), partial(f1_score, average='macro'), config.DEVICE, scheduler=scheduler, log_file='en_task2_log.txt') fit = engine.BertFitter(modeller, (train_dl, valid_dl), optimizer, utils.LabelSmoothingCrossEntropy(), partial(f1_score, average='macro'), config.DEVICE, scheduler=scheduler, log_file='en_task2_log.txt') #export fit.fit(NUM_EPOCHS, model_path=os.path.join(config.MODEL_PATH / f'en_task2_{fold}.pth'), show_graph=False) #export test_dl = utils.create_loader(test_df.text.values, lbls=[None] * len(test_df.text.values), bs=config.VALID_BATCH_SIZE, is_test=True) #export modeller = model.HasocModel(len(le.classes_)) modeller.load_state_dict( torch.load(config.MODEL_PATH / f'en_task2_{fold}.pth')) #export preds = engine.get_preds(test_dl.dataset, test_dl, modeller, config.DEVICE, ensemble_proba=True) np.save(os.path.join('..', 'outputs', f'submission_EN_B_{fold}.npy'), preds)
def main(args): args.labels: List[str] = ["Cover", "JMiPOD", "JUNIWARD", "UERD"] args.shared_indices: List[str] = ["image", "kind"] seed_everything(args.init_seed) index_train_test_images(args) if "efficientnet" in args.model_arch: model = EfficientNet.from_pretrained(args.model_arch, advprop=False, in_channels=3, num_classes=len(args.labels)) else: # "seresnet34", resnext50_32x4d" model = timm.create_model(args.model_arch, pretrained=True, num_classes=len(args.labels), in_chans=3, drop_rate=.5) # loading info for training training_configs = None validation_configs = None training_records = list() valid_records = list() if not args.inference_only: # configs validation_configs = BaseConfigs.from_file( file_path=args.valid_configs) training_configs = BaseConfigs.from_file(file_path=args.train_configs) training_configs.loss = LabelSmoothing(smoothing=.05) # split data skf = StratifiedKFold(n_splits=5) train_df, valid_df = split_data(args=args, splitter=skf) # training_records = process_images_to_records(args, df=train_df) valid_records = process_images_to_records(args, df=valid_df) # use lightning if args.use_lightning: model = BaseLightningModule(model, training_configs=training_configs, training_records=training_records, valid_configs=validation_configs, valid_records=valid_records, eval_metric_name=args.eval_metric, eval_metric_func=alaska_weighted_auc) model = training_lightning(args=args, model=model) model.freeze() # raw if args.gpus is not None: model = model.cuda() device = torch.device("cuda:0") if not args.inference_only and not args.use_lightning: train_dataset = DatasetRetriever( records=training_records, transforms=training_configs.transforms) train_loader = DataLoader( train_dataset, sampler=BalanceClassSampler(labels=train_dataset.get_labels(), mode="downsampling"), batch_size=training_configs.batch_size, pin_memory=False, drop_last=True, num_workers=training_configs.num_workers, ) validation_dataset = DatasetRetriever( records=valid_records, transforms=training_configs.transforms) val_loader = DataLoader( validation_dataset, batch_size=validation_configs.batch_size, num_workers=validation_configs.num_workers, shuffle=False, sampler=SequentialSampler(validation_dataset), pin_memory=False, ) fitter = Fitter(model=model, device=device, config=training_configs) if args.load_checkpoint and os.path.exists(args.checkpoint_path): fitter.load(args.checkpoint_path, model_weights_only=args.load_weights_only) # fitter.load(f"{fitter.base_dir}/best-checkpoint-024epoch.bin") fitter.fit(train_loader, val_loader) # Test submission = do_inference(args, model=model) if args.inference_proba: score = do_evaluate(args, submission) print(f"Inference TTA: {score:.04f}") file_path = os.path.join( args.cached_dir, f"proba__arch_{args.model_arch}__metric_{score:.4f}.parquet") submission.to_parquet(file_path) else: print(f"Inference Test:") image, kind = args.shared_indices df = submission.reset_index()[[image, args.labels[0]]] df.columns = ["Id", "Label"] df.set_index("Id", inplace=True) df["Label"] = 1. - df["Label"] df.to_csv("submission.csv", index=True) print( f"\nSubmission Stats:\n{df.describe()}\nSubmission:\n{df.head()}") return
def test_model_fn(device=torch.device("cpu")): #device = xm.xla_device(devkind='TPU') #device=torch.device("xla") logger.debug("Device used: %s", device) #k.run(dump_flag=True) # it seems it cannot save right #k.run(dump_flag=False) #k.peek_data() self = k assert self.validation_dataset is not None #assert self.learner is not None net = k.model assert net is not None net.to(device) param_optimizer = list(self.model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] #optimizer = AdamW(optimizer_grouped_parameters, lr=TrainGlobalConfig.lr*xm.xrt_world_size()) optimizer = AdamW(optimizer_grouped_parameters, lr=TrainGlobalConfig.lr*8) train_loader = torch.utils.data.DataLoader( self.train_dataset, batch_size=TrainGlobalConfig.batch_size, shuffle=False, # sampler is set, so shuffle here should be False sampler=BalanceClassSampler(labels=k.train_dataset.get_labels(), mode="downsampling"), pin_memory=False, drop_last=True, num_workers=TrainGlobalConfig.num_workers, ) validation_loader = torch.utils.data.DataLoader( self.validation_dataset, batch_size=TrainGlobalConfig.batch_size, # sampler=validation_sampler, pin_memory=False, drop_last=False, num_workers=TrainGlobalConfig.num_workers ) test_loader = torch.utils.data.DataLoader( self.test_dataset, batch_size=TrainGlobalConfig.batch_size, # sampler=test_sampler, pin_memory=False, drop_last=False, num_workers=TrainGlobalConfig.num_workers ) validation_tune_loader = torch.utils.data.DataLoader( self.validation_tune_dataset, batch_size=TrainGlobalConfig.batch_size, #sampler=validation_tune_sampler, pin_memory=False, drop_last=False, num_workers=TrainGlobalConfig.num_workers ) def validation(model, device, config, val_loader, criterion): model.eval() losses = AverageMeter() final_scores = RocAucMeter() t = time.time() for step, (inputs_masks, targets) in enumerate(val_loader): inputs=inputs_masks[0] attention_masks=inputs_masks[1] if config.verbose: if step % config.verbose_step == 0: logger.info( f'Valid Step {step}, loss: ' + \ f'{losses.avg:.5f}, final_score: {final_scores.avg:.5f}, mc_score: {final_scores.mc_avg:.5f}, ' + \ f'time: {(time.time() - t):.5f}' ) with torch.no_grad(): inputs = inputs.to(device, dtype=torch.long) attention_masks = attention_masks.to(device, dtype=torch.long) targets = targets.to(device, dtype=torch.float) outputs = model(inputs, attention_masks) loss = criterion(outputs, targets) batch_size = inputs.size(0) final_scores.update(targets, outputs) losses.update(loss.detach().item(), batch_size) def run_inference(model, device, config, test_loader): model.eval() result = {'id': [], 'toxic': []} t = time.time() for step, (inputs_masks, ids) in enumerate(test_loader): inputs=inputs_masks[0] attention_masks=inputs_masks[1] if config.verbose: if step % config.verbose_step == 0: logger.info(f'Prediction Step {step}, time: {(time.time() - t):.5f}') with torch.no_grad(): inputs = inputs.to(device, dtype=torch.long) attention_masks = attention_masks.to(device, dtype=torch.long) outputs = model(inputs, attention_masks) toxics = nn.functional.softmax(outputs, dim=1).data.cpu().numpy()[:,1] result['id'].extend(ids.cpu().numpy()) result['toxic'].extend(toxics) break # just test one batch return result def train_one_epoch(model, device, config, train_loader, criterion, optimizer): model.train() losses = AverageMeter() final_scores = RocAucMeter() t = time.time() for step, (inputs_masks, targets) in enumerate(train_loader): inputs=inputs_masks[0] attention_masks=inputs_masks[1] batch_size = inputs.size(0) if config.verbose: if step % config.verbose_step == 0: logger.debug( f'Train Step {step}, bs: {batch_size}, loss: ' + \ f"{losses.avg:.5f}, lr: {optimizer.param_groups[0]['lr']} final_score: {final_scores.avg:.5f}, mc_score: {final_scores.mc_avg:.5f}, " + \ f'time: {(time.time() - t):.5f}' ) inputs = inputs.to(device, dtype=torch.long) attention_masks = attention_masks.to(device, dtype=torch.long) targets = targets.to(device, dtype=torch.float) optimizer.zero_grad() outputs = model(inputs, attention_masks) loss = criterion(outputs, targets) final_scores.update(targets, outputs) losses.update(loss.detach().item(), batch_size) loss.backward() _check_grad(optimizer) #optimizer.step() xm.optimizer_step(optimizer, barrier=True) model.eval() #self.save('last-checkpoint.bin') return losses, final_scores def run_tuning_and_inference(net, device, TrainGlobalConfig, validation_loader, train_loader): for e in range(1): self.optimizer.param_groups[0]['lr'] = self.config.lr*8 losses, final_scores = train_one_epoch(net, device, TrainGlobalConfig, train_loader, TrainGlobalConfig.criterion, ) self.log(f'[RESULT]: Tune_Train. Epoch: {self.epoch}, loss: {losses.avg:.5f}, final_score: {final_scores.avg:.5f}, mc_score: {final_scores.mc_avg:.5f}, time: {(time.time() - t):.5f}') t = time.time() para_loader = pl.ParallelLoader(validation_loader, [self.device]) losses, final_scores = self.validation(para_loader.per_device_loader(self.device)) self.log(f'[RESULT]: Tune_Validation. Epoch: {self.epoch}, loss: {losses.avg:.5f}, final_score: {final_scores.avg:.5f}, mc_score: {final_scores.mc_avg:.5f}, time: {(time.time() - t):.5f}') run_inference(net, device, TrainGlobalConfig, validation_loader) #train_one_epoch(net, device, TrainGlobalConfig, train_loader, TrainGlobalConfig.criterion, optimizer) #losses, final_scores = validation(net, device, TrainGlobalConfig, validation_loader, TrainGlobalConfig.criterion) #logger.info(f"Val results: losses={losses}, final_scores={final_scores}") results = run_inference(net, device, TrainGlobalConfig, validation_loader) logger.info(f"Test done, result len %d", len(results))
def main(args: ArgumentParser): for dir_path in [args.output_dir, args.cached_dir, args.model_dir]: safe_mkdir(dir_path) seed_everything(args.init_seed) args = configure_arguments(args) index_train_test_images(args) eval_metric_func: Callable = alaska_weighted_auc # loading info for training # configs training_configs = None validation_configs = None if not args.inference_only: validation_configs = BaseConfigs.from_file( file_path=args.valid_configs) training_configs = BaseConfigs.from_file(file_path=args.train_configs) training_configs.loss = LabelSmoothing(smoothing=.05) # split data training_records = list() valid_records = list() if not args.inference_only: train_df, valid_df = split_train_valid_data( args=args, splitter=StratifiedKFold(n_splits=5), nr_fold=1) training_records = process_images_to_records(args, df=train_df) valid_records = process_images_to_records(args, df=valid_df) # model arch if "efficientnet" in args.model_arch: print(f"using {args.model_arch} from EfficientNet") model = EfficientNet.from_pretrained(args.model_arch, advprop=False, in_channels=3, num_classes=len(args.labels)) # model._fc = nn.Linear(in_features=1408, out_features=4, bias=True) else: # "seresnet34", resnext50_32x4d" model = timm.create_model(args.model_arch, pretrained=True, num_classes=len(args.labels), in_chans=3, drop_rate=.5) checkpoint: Optional = None checkpoint_exists: bool = os.path.exists(args.checkpoint_path) if args.use_lightning and args.export_to_lightning and checkpoint_exists: # export weights not trained by lightning before print( f"export weights to lightning from loaded checkpoint from: {args.checkpoint_path}" ) checkpoint = torch.load(args.checkpoint_path) model.load_state_dict(checkpoint["model_state_dict"]) # use lightning for training or inference if args.use_lightning: model = BaseLightningModule(model, training_configs=training_configs, training_records=training_records, valid_configs=validation_configs, valid_records=valid_records, eval_metric_name=args.eval_metric, eval_metric_func=eval_metric_func) checkpoint: Optional = None checkpoint_exists: bool = os.path.exists(args.checkpoint_path) if args.load_weights_only and not args.load_checkpoint: raise ValueError( f"Need to load checkpoint to load weights into models") if args.load_checkpoint and checkpoint_exists: print(f"loading checkpoint from: {args.checkpoint_path}") checkpoint = torch.load(args.checkpoint_path) if args.use_lightning: model.load_state_dict(checkpoint["state_dict"]) else: model.load_state_dict(checkpoint["model_state_dict"]) elif args.load_checkpoint and not checkpoint_exists: raise ValueError(f"checkpoint does not exist: {args.checkpoint_path}") if args.use_lightning and not args.inference_only: print(f"using lightning") model = training_lightning(args=args, model=model, checkpoint=checkpoint) # push model to computing if args.gpus is not None: model = model.cuda() device = torch.device("cuda:0") if not args.inference_only and not args.use_lightning: train_dataset = DatasetRetriever( records=training_records, transforms=training_configs.transforms) train_loader = DataLoader( train_dataset, sampler=BalanceClassSampler(labels=train_dataset.get_labels(), mode="downsampling"), batch_size=training_configs.batch_size, pin_memory=False, drop_last=True, num_workers=training_configs.num_workers, ) validation_dataset = DatasetRetriever( records=valid_records, transforms=training_configs.transforms) val_loader = DataLoader( validation_dataset, batch_size=validation_configs.batch_size, num_workers=validation_configs.num_workers, shuffle=False, sampler=SequentialSampler(validation_dataset), pin_memory=False, ) fitter = Fitter(model=model, device=device, config=training_configs, eval_metric_func=eval_metric_func) if args.load_checkpoint and checkpoint_exists: fitter.load(args.checkpoint_path, model_weights_only=args.load_weights_only) fitter.fit(train_loader, val_loader) # Test submission = do_inference(args, model=model, eval_metric_func=eval_metric_func) print(f"Finished Inference") if not args.inference_proba: df = generate_submission(args, submission) df.to_csv("submission.csv", index=True) return
def _mp_fn(rank, flags, k=k): device = xm.xla_device(devkind='TPU') logger.debug("%s used for xla_device" % device) net = k.model net.to(device) logger.debug("%s used for xla_device, to device done" % device) train_sampler = DistributedSamplerWrapper( sampler=BalanceClassSampler(labels=k.train_dataset.get_labels(), mode="downsampling"), num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=True ) train_loader = torch.utils.data.DataLoader( k.train_dataset, batch_size=TrainGlobalConfig.batch_size, sampler=train_sampler, pin_memory=False, drop_last=True, num_workers=TrainGlobalConfig.num_workers, ) validation_sampler = torch.utils.data.distributed.DistributedSampler( k.validation_dataset, num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=False ) validation_loader = torch.utils.data.DataLoader( k.validation_dataset, batch_size=TrainGlobalConfig.batch_size, sampler=validation_sampler, pin_memory=False, drop_last=False, num_workers=TrainGlobalConfig.num_workers ) validation_tune_sampler = torch.utils.data.distributed.DistributedSampler( k.validation_tune_dataset, num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=True ) validation_tune_loader = torch.utils.data.DataLoader( k.validation_tune_dataset, batch_size=TrainGlobalConfig.batch_size, sampler=validation_tune_sampler, pin_memory=False, drop_last=False, num_workers=TrainGlobalConfig.num_workers ) test_sampler = torch.utils.data.distributed.DistributedSampler( k.test_dataset, num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=False ) test_loader = torch.utils.data.DataLoader( k.test_dataset, batch_size=TrainGlobalConfig.batch_size, sampler=test_sampler, pin_memory=False, drop_last=False, num_workers=TrainGlobalConfig.num_workers ) logger.debug("rank: %d. Will create TPU Fitter", rank) if rank == 0: time.sleep(1) fitter = TPUFitter(model=net, device=device, config=TrainGlobalConfig) fitter.fit(train_loader, validation_loader) fitter.run_tuning_and_inference(test_loader, validation_tune_loader)
# model = Hybrid().to(device) model = torch.nn.DataParallel(model) print(model) # model.to(f'cuda:{model.device_ids[0]}') train_ds = MelanomaDataset(train_df.image_name.values, train_meta, train_df.target.values, dim=sz, transforms=train_aug) if balanced_sampler: print('Using Balanced Sampler....') train_loader = DataLoader(train_ds, batch_size=batch_size, sampler=BalanceClassSampler( labels=train_ds.get_labels(), mode="upsampling"), shuffle=False, num_workers=4) else: train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4) valid_ds = MelanomaDataset(valid_df.image_name.values, valid_meta, valid_df.target.values, dim=sz, transforms=val_aug)
def get_datasets( self, stage: str, datapath: str = None, in_csv: str = None, in_csv_train: str = None, in_csv_valid: str = None, in_csv_infer: str = None, train_folds: str = None, valid_folds: str = None, tag2class: str = None, class_column: str = None, tag_column: str = None, folds_seed: int = 42, n_folds: int = 5, one_hot_classes: bool = None, num_frames: int = None, num_segments: int = None, time_window: int = None, uniform_time_sample: bool = False, ): datasets = collections.OrderedDict() tag2class = json.load(open(tag2class)) \ if tag2class is not None \ else None df, df_train, df_valid, df_infer = read_csv_data( in_csv=in_csv, in_csv_train=in_csv_train, in_csv_valid=in_csv_valid, in_csv_infer=in_csv_infer, train_folds=train_folds, valid_folds=valid_folds, tag2class=tag2class, class_column=class_column, tag_column=tag_column, seed=folds_seed, n_folds=n_folds) df_valid = preprocess_valid_data(df_valid) open_fn = [ ScalarReader(input_key="class", output_key="targets", default_value=-1, dtype=np.int64) ] if one_hot_classes: open_fn.append( ScalarReader(input_key="class", output_key="targets_one_hot", default_value=-1, dtype=np.int64, one_hot_classes=one_hot_classes)) open_fn_val = open_fn.copy() open_fn.append( VideoImageReader(input_key="filepath", output_key="features", datapath=datapath, num_frames=num_frames, num_segments=num_segments, time_window=time_window, uniform_time_sample=uniform_time_sample)) open_fn_val.append( VideoImageReader(input_key="filepath", output_key="features", datapath=datapath, num_frames=num_frames, num_segments=num_segments, time_window=time_window, uniform_time_sample=uniform_time_sample, with_offset=True)) open_fn = ReaderCompose(readers=open_fn) open_fn_val = ReaderCompose(readers=open_fn_val) for source, mode in zip((df_train, df_valid, df_infer), ("train", "valid", "infer")): if len(source) > 0: dataset = ListDataset( source, open_fn=open_fn_val if mode == "valid" else open_fn, dict_transform=self.get_transforms(stage=stage, mode=mode), ) dataset_dict = {"dataset": dataset} if mode == "train": labels = [x["class"] for x in df_train] sampler = BalanceClassSampler(labels, mode="upsampling") dataset_dict['sampler'] = sampler datasets[mode] = dataset_dict return datasets
def get_train_test_valid_dataloaders(data_path, test_data_path, seed, image_size, batch_size): """ Utility function for the model. """ def build_data(data_path): content_list = [] labels_list = [] for image in tqdm(os.listdir(data_path)): if ".jpg" in image: content = cv2.imread(data_path + image) content_list.append(content) elif ".txt" in image: with open(data_path + image, "r") as f: labels = f.read() labels = np.array(labels.split(" "), dtype=int) labels[0] = 0 if labels[0] == 1 else 1 labels = np.roll(labels, -1) labels_list.append(labels) data = np.array([list(a) for a in zip(content_list, labels_list)]) return data train_data = build_data(data_path=data_path) test_data = build_data(data_path=test_data_path) train_data, valid_data = train_test_split(train_data, shuffle=True, test_size=0.1, random_state=seed) train_clf_labels = [a[-1] for a in train_data[:, 1]] transform = Compose( [ Resize(width=image_size, height=image_size), HorizontalFlip(p=0.4), # ShiftScaleRotate(p=0.3), MedianBlur(blur_limit=7, always_apply=False, p=0.3), IAAAdditiveGaussianNoise(scale=(0, 0.15 * 255), p=0.5), HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.4), RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5), # in this implementation imagenet normalization is used Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0), Cutout(p=0.4), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format="pascal_voc"), ) test_transform = Compose( [ # only resize and normalization is used for testing # no TTA is implemented in this solution Resize(width=image_size, height=image_size), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format="pascal_voc"), ) train_dataset = Dataset(train_data, transforms=transform) valid_dataset = Dataset(valid_data, transforms=transform) test_dataset = Dataset(test_data, transforms=test_transform) train_dataloader = DataLoader( train_dataset, # balanced sampler is used to minimize harmful effects of dataset not being fully balanced sampler=BalanceClassSampler(labels=train_clf_labels, mode="upsampling"), batch_size=batch_size, ) test_dataloader = DataLoader(test_dataset, sampler=SequentialSampler(test_dataset), batch_size=1) valid_dataloader = DataLoader(valid_dataset, sampler=SequentialSampler(valid_dataset), batch_size=batch_size) return train_dataloader, test_dataloader, valid_dataloader
def main(args): SEED = 16121985 seed_everything(SEED) with open(args.config_path, "r") as f: config = json.load(f) set_cuda_device(config["GPU"]) device = torch.device('cuda') base_dir = f'./ckpt/{config["exp_name"]}' os.makedirs(base_dir, exist_ok=True) shutil.copy(args.config_path, base_dir) train_dataset, valid_dataset, test_dataset = get_train_valid_datasets_jpeg( config) train_loader = torch.utils.data.DataLoader( train_dataset, sampler=BalanceClassSampler(labels=train_dataset.get_labels(), mode="downsampling"), batch_size=config["batch_size"], pin_memory=False, drop_last=True, num_workers=0, ) val_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=64, num_workers=0, shuffle=False, sampler=SequentialSampler(valid_dataset), pin_memory=False, ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=64, num_workers=0, shuffle=False, sampler=SequentialSampler(test_dataset), pin_memory=False, ) model = get_model(config["model_name"]) model = model.to(device) iter_per_ep = len(train_loader) fitter = Fitter(model=model, device=device, config=config, n_iter_per_ep=iter_per_ep) if config["CURRICULUM"]: fitter.load(config["CL_path"]) print('--> INITIAL <-- MODEL USED FOR TRAINING LOADED') fitter.fit(train_loader, val_loader, test_loader) fitter.load(f'{fitter.base_dir}/model-best.bin') print('--> TRAINED <-- MODEL WITH LOWEST LOSS LOADED') fitter.test(test_loader) fitter.load(f'{fitter.base_dir}/model-bestAcc.bin') print('--> TRAINED <-- MODEL WITH LOWEST ACCURACY LOADED') fitter.test(test_loader)
def prepare_loaders(*, mode: str, stage: str = None, n_workers: int = None, batch_size: int = None, datapath=None, in_csv=None, in_csv_train=None, in_csv_valid=None, in_csv_infer=None, train_folds=None, valid_folds=None, tag2class=None, class_column=None, tag_column=None, folds_seed=42, n_folds=5): loaders = collections.OrderedDict() df, df_train, df_valid, df_infer = parse_in_csvs( in_csv=in_csv, in_csv_train=in_csv_train, in_csv_valid=in_csv_valid, in_csv_infer=in_csv_infer, train_folds=train_folds, valid_folds=valid_folds, tag2class=tag2class, class_column=class_column, tag_column=tag_column, folds_seed=folds_seed, n_folds=n_folds) open_fn = [ ImageReader(input_key="filepath", output_key="image", datapath=datapath), ScalarReader(input_key="class", output_key="targets", default_value=-1, dtype=np.int64) ] open_fn = ReaderCompose(readers=open_fn) if len(df_train) > 0: labels = [x["class"] for x in df_train] sampler = BalanceClassSampler(labels, mode="upsampling") train_loader = UtilsFactory.create_loader( data_source=df_train, open_fn=open_fn, dict_transform=DataSource.prepare_transforms(mode="train", stage=stage), dataset_cache_prob=-1, batch_size=batch_size, workers=n_workers, shuffle=sampler is None, sampler=sampler) print("Train samples", len(train_loader) * batch_size) print("Train batches", len(train_loader)) loaders["train"] = train_loader if len(df_valid) > 0: sampler = None valid_loader = UtilsFactory.create_loader( data_source=df_valid, open_fn=open_fn, dict_transform=DataSource.prepare_transforms(mode="valid", stage=stage), dataset_cache_prob=-1, batch_size=batch_size, workers=n_workers, shuffle=False, sampler=sampler) print("Valid samples", len(valid_loader) * batch_size) print("Valid batches", len(valid_loader)) loaders["valid"] = valid_loader if len(df_infer) > 0: infer_loader = UtilsFactory.create_loader( data_source=df_infer, open_fn=open_fn, dict_transform=DataSource.prepare_transforms(mode="infer", stage=None), dataset_cache_prob=-1, batch_size=batch_size, workers=n_workers, shuffle=False, sampler=None) print("Infer samples", len(infer_loader) * batch_size) print("Infer batches", len(infer_loader)) loaders["infer"] = infer_loader return loaders
def get_datasets( self, stage: str, datapath: str = None, in_csv: str = None, in_csv_train: str = None, in_csv_valid: str = None, in_csv_infer: str = None, train_folds: str = None, valid_folds: str = None, tag2class: str = None, class_column: str = "class", input_column: str = "filepath", tag_column: str = None, folds_seed: int = 42, n_folds: int = 5, one_hot_classes: int = None, upsampling: bool = False, image_size: int = 224, crop_from_gray: bool = False, circle_crop: bool = False, normalize: bool = True, ben_preprocess: int = 10, hor_flip: float = 0.5, ver_flip: float = 0.2, rotate: int = 120, random_scale: int = 0.3, random_scale_p: int = 0.75, brightness: float = 0.2, contrast: float = 0.2, color_p: float = 0.5, ): datasets = collections.OrderedDict() tag2class = json.load(open(tag2class)) if tag2class is not None else None df, df_train, df_valid, df_infer = read_csv_data( in_csv=in_csv, in_csv_train=in_csv_train, in_csv_valid=in_csv_valid, in_csv_infer=in_csv_infer, train_folds=train_folds, valid_folds=valid_folds, tag2class=tag2class, class_column=class_column, tag_column=tag_column, seed=folds_seed, n_folds=n_folds ) for source, mode in zip( (df_train, df_valid, df_infer), ("train", "valid", "infer") ): if len(source) > 0: transforms = self.get_transforms( stage=stage, mode=mode, image_size=image_size, one_hot_classes=one_hot_classes, crop_from_gray=crop_from_gray, circle_crop=circle_crop, normalize=normalize, ben_preprocess=ben_preprocess, hor_flip=hor_flip, ver_flip=ver_flip, rotate=rotate, random_scale=random_scale, random_scale_p=random_scale_p, brightness=brightness, contrast=contrast, color_p=color_p, ) if mode == "valid": dataset = RetinopathyDatasetTrain(pd.DataFrame(source), "./data/train_images", transforms) else: dataset = RetinopathyDatasetTrain(pd.DataFrame(source), datapath, transforms) if upsampling is True and mode == "train": labels = [x[class_column] for x in source] sampler = BalanceClassSampler(labels, mode="upsampling") dataset = {"dataset": dataset, "sampler": sampler} datasets[mode] = dataset return datasets
train = MelanomaDataset(df=train_all_df.reset_index(drop=True), config=config, imfolder=config['data_folder'] + '/train', split='train', meta_features=meta_features) val = MelanomaDataset(df=train_df.loc[val_idx].reset_index(drop=True), config=config, imfolder=config['data_folder'] + '/train', split='val', meta_features=meta_features) if config.get('balanced_batches', False): train_loader = DataLoader(dataset=train, sampler=BalanceClassSampler( train, list(train_all_df['target'])), batch_size=config['batch_size'], num_workers=2) print('balanced_batches') else: train_loader = DataLoader(dataset=train, batch_size=config['batch_size'], shuffle=True, num_workers=2) val_loader = DataLoader(dataset=val, batch_size=16, shuffle=False, num_workers=2) if config['test']:
def main(): args = parser.parse_args() if TrainGlobalConfig.is_train: dataset = [] for label, kind in enumerate(['Cover', 'JMiPOD', 'JUNIWARD', 'UERD']): for path in glob( '/data/kaggle/alaska2-image-steganalysis/Cover/*.jpg'): dataset.append({ 'kind': kind, 'image_name': path.split('/')[-1], 'label': label }) random.shuffle(dataset) dataset = pd.DataFrame(dataset) gkf = GroupKFold(n_splits=5) dataset.loc[:, 'fold'] = 0 for fold_number, (train_index, val_index) in enumerate( gkf.split(X=dataset.index, y=dataset['label'], groups=dataset['image_name'])): dataset.loc[dataset.iloc[val_index].index, 'fold'] = fold_number # -------------------- fold_number = 1 train_dataset = DatasetRetriever( kinds=dataset[dataset['fold'] != fold_number].kind.values, image_names=dataset[ dataset['fold'] != fold_number].image_name.values, labels=dataset[dataset['fold'] != fold_number].label.values, transforms=get_train_transforms(), ) validation_dataset = DatasetRetriever( kinds=dataset[dataset['fold'] == fold_number].kind.values, image_names=dataset[dataset['fold'] == fold_number].image_name.values, labels=dataset[dataset['fold'] == fold_number].label.values, transforms=get_valid_transforms(), ) # -------------------- train_loader = torch.utils.data.DataLoader( train_dataset, sampler=BalanceClassSampler(labels=train_dataset.get_labels(), mode="downsampling"), batch_size=TrainGlobalConfig.batch_size, pin_memory=True, drop_last=True, num_workers=TrainGlobalConfig.num_workers, ) val_loader = torch.utils.data.DataLoader( validation_dataset, batch_size=TrainGlobalConfig.batch_size, num_workers=TrainGlobalConfig.num_workers, shuffle=False, sampler=SequentialSampler(validation_dataset), pin_memory=True, ) # -------------------- device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = CustomModel() model = model.cuda() fitter = Fitter(model=model, device=device, config=TrainGlobalConfig) if args.resume is not None: fitter.load(args.resume) fitter.fit(train_loader, val_loader) else: test_dataset = DatasetSubmissionRetriever( image_names=np.array([ path.split('/')[-1] for path in glob( '/data/kaggle/alaska2-image-steganalysis/Test/*.jpg') ]), transforms=get_valid_transforms(), ) # -------------------- data_loader = DataLoader( test_dataset, batch_size=64, shuffle=False, num_workers=8, drop_last=False, pin_memory=True, ) # -------------------- model = CustomModel() model = model.cuda() model = nn.DataParallel(model) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['model_state_dict']) model.eval() # -------------------- result = {'Id': [], 'Label': []} for step, (image_names, images) in enumerate(data_loader): print(step, end='\r') y_pred = model(images.cuda()) y_pred = 1 - nn.functional.softmax(y_pred, dim=1).data.cpu().numpy()[:, 0] result['Id'].extend(image_names) result['Label'].extend(y_pred) submission = pd.DataFrame(result) submission.to_csv('output_fold_{}.csv'.format(fold_number), index=False)
# optimizer = optim.Adam(model.parameters(), lr=lr) optimizer = optim.AdamW(model.parameters(), lr=lr) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.9) is_train = True if is_train: xdl = DeeperForensicsDatasetNew(real_npys=train_real_paths_npy, fake_npys=train_fake_paths_npy, is_one_hot=True, transforms=None) train_loader = DataLoader(xdl, batch_size=batch_size, shuffle=False, num_workers=4, sampler=BalanceClassSampler( labels=xdl.get_labels(), mode="downsampling")) # train_loader = DataLoader(xdl, batch_size=batch_size, shuffle=True, num_workers=4) train_dataset_len = len(xdl) xdl_eval = DeeperForensicsDatasetNew(real_npys=val_real_paths_npy, fake_npys=val_fake_paths_npy, is_one_hot=False, transforms=None, data_type='val') eval_loader = DataLoader(xdl_eval, batch_size=test_batch_size, shuffle=False, num_workers=4) eval_dataset_len = len(xdl_eval) print('train_dataset_len:', train_dataset_len, 'eval_dataset_len:',