def train_loop(self, opt, lr_scheduler, loss_, x_var, y_var, batch_size=32): dataset = torch.utils.data.TensorDataset(x_var, y_var) train_data = torch.utils.data.DataLoader( dataset, batch_size=batch_size, sampler=ImbalancedDatasetSampler(dataset), shuffle=False) self.val_data = torch.utils.data.DataLoader( dataset, batch_size=batch_size, sampler=ImbalancedDatasetSampler(dataset), shuffle=False) if self.load_model: model_pickle = torch.load(self.model_save_string.format( self.epoch)) self.load_state_dict(model_pickle['model_state_dict']) opt.load_state_dict(model_pickle['model_state_dict']) self.epoch = model_pickle['epoch'] loss_val = model_pickle['loss'] for epoch in range(self.epoch, 20000): for i, data in enumerate(train_data): x, y = data opt.zero_grad() acts = self.forward(x) loss = self.loss_fn(loss_, acts, y) loss.backward() opt.step() if epoch % 10 == 0: self.writer.add_histogram("acts", y) self.writer.add_histogram("preds", acts) self.writer.add_scalar('Loss', loss.data.item(), epoch) self.writer.add_scalar('Acc', self.accuracy(), epoch) torch.save( { 'epoch': epoch, 'model_state_dict': self.state_dict(), 'optimizer_state_dict': opt.state_dict(), 'loss': loss, }, self.model_save_string.format(epoch))
def run(self): # Loss and Optimizer criterion = nn.CrossEntropyLoss() _params = filter(lambda p: p.requires_grad, self.model.parameters()) optimizer = self.opt.optimizer(_params, lr=self.opt.learning_rate, weight_decay=self.opt.l2reg) train_data_loader = DataLoader( dataset=self.trainset, batch_size=self.opt.batch_size, sampler=ImbalancedDatasetSampler( self.trainset, callback_get_label=self._get_label_callback)) test_data_loader = DataLoader(dataset=self.testset, batch_size=self.opt.batch_size, shuffle=False) val_data_loader = DataLoader(dataset=self.valset, batch_size=self.opt.batch_size, shuffle=False) self._reset_params() best_model_path = self._train(criterion, optimizer, train_data_loader, val_data_loader) self.model.load_state_dict(torch.load(best_model_path)) self.model.eval() test_acc, test_f1 = self._evaluate_acc_f1(test_data_loader, save_file=True) logger.info('>> test_acc: {:.4f}, test_f1: {:.4f}'.format( test_acc, test_f1))
def imagesToDataloader(path, batch_size=32, trans=None, balance=False, train=True): print('Mode: Training is {}'.format(train)) # Define transforms if trans is None: trans = transforms.Compose([ transforms.Resize(224), transforms.Grayscale(num_output_channels=3), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) # Create an ImageFolder dataset = ImageFolder(path, transform=trans) classToIdx = dataset.class_to_idx # Create a dataloader if train and balance: sampler = ImbalancedDatasetSampler(dataset) elif train and not balance: sampler = RandomSampler(dataset) else: sampler = SequentialSampler(dataset) dataloader = DataLoader(dataset, batch_size=batch_size, sampler=sampler) print('Dataloader created') return dataloader, classToIdx
def __init__(self, batch_size=4, valid_size=0.2, dataset_resolution=DatasetResolution.MEDIUM): train_dir = dataset_resolution.get('train_dir') test_dir = dataset_resolution.get('test_dir') train_transform = transforms.Compose([ transforms.Pad(dataset_resolution.get('padding')), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) test_transform = train_transform train_data = torchvision.datasets.ImageFolder( train_dir, transform=train_transform) test_data = torchvision.datasets.ImageFolder(test_dir, transform=test_transform) num_train = len(train_data) indices_train = list(range(num_train)) np.random.shuffle(indices_train) split_tv = int(np.floor(valid_size * num_train)) train_idx, valid_idx = indices_train[ split_tv:], indices_train[:split_tv] train_sampler = ImbalancedDatasetSampler(train_data, indices=train_idx) valid_sampler = ImbalancedDatasetSampler(train_data, indices=valid_idx) self.train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=train_sampler, num_workers=2) self.valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=valid_sampler, num_workers=2) self.test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=2) self.classes = ('piste-cyclable', 'route', 'sentier', 'trottoir', 'voie-partagee')
def train_dataloader(self) -> DataLoader: sampler = ImbalancedDatasetSampler( self.train, callback_get_label=callback_get_label ) if self.balance_sampler else None shuffle = not self.balance_sampler return DataLoader(self.train, sampler=sampler, batch_size=self.batch_size, shuffle=shuffle, pin_memory=True, num_workers=self.n_workers)
def run(self): # Loss and Optimizer criterion = nn.CrossEntropyLoss() _params = filter(lambda p: p.requires_grad, self.model.parameters()) optimizer = self.opt.optimizer(_params, lr=self.opt.learning_rate, weight_decay=self.opt.l2reg) test_data_loader = DataLoader(dataset=self.testset, batch_size=self.opt.batch_size, shuffle=False) valset_len = len(self.trainset) // self.opt.cross_val_fold splitedsets = random_split( self.trainset, tuple([valset_len] * (self.opt.cross_val_fold - 1) + [ len(self.trainset) - valset_len * (self.opt.cross_val_fold - 1) ])) all_test_acc, all_test_f1 = [], [] for fid in range(self.opt.cross_val_fold): logger.info('fold : {}'.format(fid)) logger.info('>' * 100) trainset = ConcatDataset( [x for i, x in enumerate(splitedsets) if i != fid]) valset = splitedsets[fid] train_data_loader = DataLoader( dataset=trainset, batch_size=self.opt.batch_size, sampler=ImbalancedDatasetSampler( trainset, callback_get_label=self._get_label_callback)) val_data_loader = DataLoader(dataset=valset, batch_size=self.opt.batch_size, shuffle=False) self._reset_params() best_model_path = self._train(criterion, optimizer, train_data_loader, val_data_loader) self.model.load_state_dict(torch.load(best_model_path)) test_acc, test_f1 = self._evaluate_acc_f1(val_data_loader, save_file=True, fid=fid) all_test_acc.append(test_acc) all_test_f1.append(test_f1) logger.info('>> test_acc: {:.4f}, test_f1: {:.4f}'.format( test_acc, test_f1)) mean_test_acc, mean_test_f1 = numpy.mean(all_test_acc), numpy.mean( all_test_f1) logger.info('>' * 100) logger.info('>>> mean_test_acc: {:.4f}, mean_test_f1: {:.4f}'.format( mean_test_acc, mean_test_f1))
def train_dataloader(self): train_set = SpectrogramDataset(df=self.df[self.df['fold'] != self.hparams.fold]) if self.hparams.balanceSample: print('balance sample, it will take ~21min for train set.') sampler = ImbalancedDatasetSampler(train_set, callback_get_label=callback_get_label1) train_loader = torch.utils.data.DataLoader(train_set, sampler=sampler, batch_size=self.hparams.batch_size) else: print('normal sample.') train_loader = torch.utils.data.DataLoader(train_set, batch_size=self.hparams.batch_size, shuffle=True) return train_loader
def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False, rank=-1, world_size=1, workers=8, image_weights=False, quad=False, prefix=''): # Make sure only the first process in DDP process the dataset first, and the following others can use the cache with torch_distributed_zero_first(rank): dataset = LoadImagesAndLabels( path, imgsz, batch_size, augment=augment, # augment images hyp=hyp, # augmentation hyperparameters rect=rect, # rectangular training cache_images=cache, single_cls=opt.single_cls, stride=int(stride), pad=pad, image_weights=image_weights, prefix=prefix) batch_size = min(batch_size, len(dataset)) nw = min([ os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers ]) # number of workers # sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None sampler = ImbalancedDatasetSampler(dataset) if rank != -1 else None loader = torch.utils.data.DataLoader if image_weights else InfiniteDataLoader # Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader() dataloader = loader(dataset, batch_size=batch_size, num_workers=nw, sampler=sampler, pin_memory=True, collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn) return dataloader, dataset
def get_dataLoader(dataDir, batch_size=32, workers=1): trainDir = os.path.join(dataDir, 'train') valDir = os.path.join( dataDir, 'val', ) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( trainDir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader( train_dataset, sampler=ImbalancedDatasetSampler(train_dataset), batch_size=batch_size, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valDir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=batch_size, shuffle=False, num_workers=workers, pin_memory=True) return train_loader, val_loader
with open( os.path.join(os.path.dirname(__file__), "config", "pneumothorax.yaml")) as config_file: config = yaml.full_load(config_file) train_transform = get_transforms(config["train"]["transform"]) val_transform = get_transforms(config["val"]["transform"]) train_ds = PneumothoraxClassificationTrainDataset(config["train"]["path"], transform=train_transform) val_ds = PneumothoraxClassificationTrainDataset(config["val"]["path"], transform=val_transform) train_dl = torch.utils.data.DataLoader( train_ds, batch_size=config["batch_size"], num_workers=12, sampler=ImbalancedDatasetSampler( train_ds, callback_get_label=lambda ds, idx: int(ds.images_list.iloc[idx][1])), ) val_dl = torch.utils.data.DataLoader(val_ds, batch_size=config["batch_size"], drop_last=True, num_workers=12) print("Train: ", len(train_dl), " Val: ", len(val_dl)) trainer = Trainer(config, train_dl, val_dl) trainer.train()
def main(argv): # ========== Obsługa flag ========== try: opts, args = getopt.getopt(sys.argv[1:], "c:d:hs:e:l:w:", [ "--decay=", "--dropout=", "--help", "--save_dir_path=", "--epoch=", "--learning_rate=", "--weights=" ]) except getopt.GetoptError as err: # print help information and exit: print(err) # will print something like "option -a not recognized" # usage() sys.exit(2) decay = 0 dropout_p = 0 save_dir_path = ''.join([main_dir_path, '/']) epoch = 1 lr = 1e-5 weights = False for o, a in opts: if o in ("-c", "--decay"): decay = float(a) print("Decay: {}".format(decay)) elif o in ("-d", "--dropout"): dropout_p = float(a) elif o in ("-e", "--epoch"): epoch = int(a) elif o in ("-h", "--help"): # usage() print(''' \n-c, --decay : Weight decay \n-d, --dropout : Dropout probability <0, 1> \n-e, --epoch : Number of epochs \n-s, --save_dir_path : Path to save files \n-l, --learning_rate : Learning rate value \n-w, --weights : Balancing with weights (True) or using sampler (False) \n''') sys.exit() elif o in ("-s", "--save_dir_path"): save_dir_path = ''.join([save_dir_path, a, '/']) elif o in ("-l", "--learning_rate"): lr = float(a) print("Lr: {}".format(lr)) elif o in ("-w", "--weights"): weights = bool(a) print("Weights: {}".format(weights)) else: assert False, "unhandled option" # ================================== assert isinstance(device, torch.device) print('Running on: {}'.format(device)) # ========== Odnosnik do treningowego zbioru danych ========== if not os.path.exists(train_data_path): print("Provided path does not exist.") os.mkdir(train_data_path) print("New directory was created.") # Sprawdzenie czy zbior danych treningowych istnieje. assert os.path.exists(train_data_path) # ============================================================ # =================== Definicja transformat ================== crop_size = 900 # image_size = 224 # random_crop_size = 224 # Wyjściowy rozmiar obrazu train_transform = Compose([ CenterCrop(crop_size), Resize(image_size), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) test_transform = Compose([ CenterCrop(crop_size), Resize(image_size), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) aug_transform_1 = Compose([ CenterCrop(crop_size), RandomRotation(10), Resize(image_size), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) aug_transform_2 = Compose([ Resize(crop_size), RandomHorizontalFlip(p=0.5), RandomVerticalFlip(p=0.5), RandomRotation(30), Resize(image_size + 4), RandomCrop(random_crop_size), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) aug_transform_3 = Compose([ Resize(crop_size), RandomHorizontalFlip(p=0.5), RandomVerticalFlip(p=0.5), RandomRotation(180), Resize(image_size + 4), RandomCrop(random_crop_size), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) aug_transform_4 = Compose([ CenterCrop(crop_size), RandomRotation(5), ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.01), RandomHorizontalFlip(p=0.5), RandomVerticalFlip(p=0.5), Resize(image_size + 30), RandomCrop(random_crop_size), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # ============================================================ # ================ Podzial zbioru na podzbiory =============== # dataset_do_testow = ImageFolder(train_data_path, transform=train_transform) train_dataset = ImageFolder(train_data_path, transform=aug_transform_3) validation_dataset = ImageFolder(validation_data_path, transform=test_transform) test_dataset = ImageFolder(test_data_path, transform=test_transform) # ************** Do testów ************** # ***** Ustalenie wielkości zbiorów ***** # train_dataset_size = int(len(dataset_do_testow) * 0.8) # 80% # val_dataset_size = int(len(dataset_do_testow) * 0.1) # 10% # test_dataset_size = len(dataset_do_testow) - train_dataset_size - val_dataset_size # 10% # train_dataset, validation_dataset, test_dataset = random_split(dataset_do_testow, [train_dataset_size, val_dataset_size, test_dataset_size]) # *************************************** # *************************************** # ========== Sprawdzenie poprawności zbiorów danych ========== assert len(train_dataset) + len( validation_dataset ) == 20269, 'Łączna liczba obrazów w zbiorze treningowym i walidacyjnym musi być równa 20269 a wynosi: {}'.format( len(train_dataset) + len(validation_dataset)) assert len( test_dataset ) == 5062, 'Liczba obrazów w zbiorze testowym musi być równa 5062 a wynosi: {}'.format( len(test_dataset)) print('Liczba elementów w zbiorze treningowym: {}'.format( len(train_dataset))) print('Liczba elementów w zbiorze walidacyjnym: {}'.format( len(validation_dataset))) print('Liczba elementów w zbiorze testowym: {}'.format(len(test_dataset))) # ============================================================ # =========== Zdefiniowanie batchy do uczenia sieci ========== batch_size = 10 # ***** Balanced dataset with weights ***** if weights: train_gen = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=10) val_gen = DataLoader(dataset=validation_dataset, batch_size=batch_size, shuffle=False, num_workers=10) test_gen = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=10) else: # ***** Balanced dataset with sampler ***** train_sampler = ImbalancedDatasetSampler(train_dataset) validation_sampler = ImbalancedDatasetSampler(validation_dataset) train_gen = DataLoader(dataset=train_dataset, sampler=train_sampler, batch_size=batch_size, num_workers=10) val_gen = DataLoader(dataset=validation_dataset, sampler=validation_sampler, batch_size=batch_size, shuffle=False, num_workers=10) test_gen = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=10) # ***** Not balanced dataset ***** # train_gen = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=6) # val_gen = DataLoader(dataset=validation_dataset, batch_size=batch_size, shuffle=False, num_workers=7) # test_gen = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=7) print('Ilosc batchy treningowych: {}'.format(len(train_gen))) print('Ilosc batchy walidacyjnych: {}'.format(len(val_gen))) print('Ilosc batchy testowych: {}'.format(len(test_gen))) # ============================================================ # ============================================================ # ================ Przygotowanie modeli sieci ================ # ============================================================ # Wykorzystanie modelu sieci Resnet-101, Resnet-152 oraz Inception_v3 z biblioteki torchvision.models # pre-trenowany na duzym zbiorze danych obrazowych (IMAGENET) net_resnet18 = models.resnet18(pretrained=True) # net_resnet34 = models.resnet34(pretrained=True) # net_resnet101 = models.resnet101(pretrained=True) # net_resnet152 = models.resnet152(pretrained=False) # net_inception_v3 = models.inception_v3(pretrained=True) net_resnext50 = models.resnext50_32x4d(pretrained=True) net = net_resnext50 # net_resnet18 # net_resnet34 # net_resnet101 net_resnet152 net_inception_v3 # Zamrozenie parametrow sieci jest potrzebne w przypadku gdy model jest pretrenowany. # Jesli trening jest od poczatku nie zamrazamy parametrow. # Zamrozenie parametrow sieci # for param in net.parameters(): # param.requires_grad = False # Zmodyfikacja sieci by zwaracala tensor o liczbie cech rownej liczbie rozpoznawanych klas # Parametry nowoskonstruowanej warstwy beda mialy domyslne ustawienie requires_grad=True amend_resnet_classifier(net, num_classes=8, dropout_probability=dropout_p) # ***************** Trening sieci **************** cudnn.fastet = True print('Trening modelu: \n') print('Saving folder: ', save_dir_path) net.to(device) train_model(net, train_gen, val_gen, num_epochs=epoch, lr=lr, save_dir_path=save_dir_path, weight_decay=decay, weights=weights)
def main(args): device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") n_gpu = torch.cuda.device_count() logger.info("device: {} n_gpu: {}, 16-bits training: {}".format( device, n_gpu, args.fp16)) if args.gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1" .format(args.gradient_accumulation_steps)) args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if n_gpu > 0: torch.cuda.manual_seed_all(args.seed) if not args.do_train and not args.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) if args.do_train: logger.addHandler( logging.FileHandler(os.path.join(args.output_dir, "train.log"), 'w')) else: logger.addHandler( logging.FileHandler(os.path.join(args.output_dir, "eval.log"), 'w')) logger.info(args) processor = TC_Processor() label_list = processor.get_labels() id2label = {i: label for i, label in enumerate(label_list)} num_labels = len(label_list) tokenizer = BertTokenizer.from_pretrained(args.model, do_lower_case=args.do_lower_case) if args.do_train: train_examples = processor.get_train_examples(args.data_dir) train_features = convert_examples_to_features(train_examples, label_list, args.max_seq_length, tokenizer) if args.train_mode == 'sorted' or args.train_mode == 'random_sorted': train_features = sorted(train_features, key=lambda f: np.sum(f.input_mask)) else: random.shuffle(train_features) all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) if args.fp16: all_label_ids = all_label_ids.half() train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids) # train_dataloader = DataLoader(train_data, batch_size=args.train_batch_size, drop_last=True) # print(type(call_get_label(train_data,3))) train_dataloader = DataLoader( train_data, sampler=ImbalancedDatasetSampler(train_data), batch_size=args.train_batch_size, drop_last=True) train_batches = [batch for batch in train_dataloader] eval_step = max(1, len(train_batches) // args.eval_per_epoch) num_train_optimization_steps = \ len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs logger.info("***** Training *****") logger.info(" Num examples = %d", len(train_examples)) logger.info(" Batch size = %d", args.train_batch_size) logger.info(" Num steps = %d", num_train_optimization_steps) best_result = None # lrs = [args.learning_rate] if args.learning_rate else \ # [1e-6, 2e-6, 3e-6, 5e-6, 1e-5, 2e-5, 3e-5, 5e-5] lr = 1e-6 # for lr in lrs: cache_dir = args.cache_dir if args.cache_dir else \ PYTORCH_PRETRAINED_BERT_CACHE model = BertForSequenceClassification.from_pretrained( args.model, cache_dir=cache_dir, num_labels=num_labels) if args.fp16: model.half() model.to(device) if n_gpu > 1: model = torch.nn.DataParallel(model) # Prepare optimizer param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] if args.fp16: try: from apex.optimizers import FP16_Optimizer from apex.optimizers import FusedAdam except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex" "to use distributed and fp16 training.") optimizer = FusedAdam(optimizer_grouped_parameters, lr=lr, bias_correction=False, max_grad_norm=1.0) if args.loss_scale == 0: optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale) else: optimizer = BertAdam(optimizer_grouped_parameters, lr=lr, warmup=args.warmup_proportion, t_total=num_train_optimization_steps) global_step = 0 nb_tr_steps = 0 nb_tr_examples = 0 tr_loss = 0 start_time = time.time() for epoch in range(int(args.num_train_epochs)): model.train() logger.info("Start epoch #{} (lr = {})...".format(epoch, lr)) if args.train_mode == 'random' or args.train_mode == 'random_sorted': random.shuffle(train_batches) for step, batch in enumerate(train_batches): batch = tuple(t.to(device) for t in batch) input_ids, input_mask, segment_ids, label_ids = batch logits = model(input_ids, segment_ids, input_mask, labels=None) # weights = [0.4, 1] # class_weights = torch.FloatTensor(weights).cuda() weight_CE = torch.DoubleTensor([ 2.3, 4.8, 1.2, 1.8, 3.4, 8, 7.6, 3.7, 34.6, 17.3, 10.1, 2.1, 1.2, 1.8 ]) loss_fct = CrossEntropyLoss(weight=weight_CE) loss = loss_fct(logits.view(-1, num_labels), label_ids.view(-1)) if n_gpu > 1: loss = loss.mean() if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps if args.fp16: optimizer.backward(loss) else: loss.backward() tr_loss += loss.item() nb_tr_examples += input_ids.size(0) nb_tr_steps += 1 if (step + 1) % args.gradient_accumulation_steps == 0: if args.fp16: lr_this_step = lr * \ warmup_linear(global_step/num_train_optimization_steps, args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step optimizer.step() optimizer.zero_grad() global_step += 1 if (step + 1) % eval_step == 0: logger.info( 'Epoch: {}, Step: {} / {}, used_time = {:.2f}s, loss = {:.6f}' .format(epoch, step + 1, len(train_dataloader), time.time() - start_time, tr_loss / nb_tr_steps)) model_to_save = model.module if hasattr(model, 'module') else model output_model_file = os.path.join( args.output_dir, 'epoch' + str(epoch) + '_' + WEIGHTS_NAME) output_config_file = os.path.join(args.output_dir, CONFIG_NAME) torch.save(model_to_save.state_dict(), output_model_file) model_to_save.config.to_json_file(output_config_file) tokenizer.save_vocabulary(args.output_dir) if args.do_test: eval_examples = processor.get_test_examples(args.data_dir) eval_features = convert_examples_to_features(eval_examples, label_list, args.max_seq_length, tokenizer) logger.info("***** Test *****") logger.info(" Num examples = %d", len(eval_examples)) logger.info(" Batch size = %d", args.eval_batch_size) all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids) eval_dataloader = DataLoader(eval_data, batch_size=args.eval_batch_size) model = BertForSequenceClassification.from_pretrained( args.output_dir, num_labels=num_labels) if args.fp16: model.half() model.to(device) preds = evaluate(model, device, eval_dataloader) pred_file = os.path.join(args.output_dir, PRED_FILE) with open(pred_file, "w") as f_out: f_out.write("index\tprediction\n") for i, pred in enumerate(preds): f_out.write("%d\t%s\n" % (i, id2label[pred]))
def train_model(config, wandb): seed_everything(config.seed) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model_module = getattr(import_module("model"), config.model) model = model_module(num_classes=18).to(device) #model = torch.nn.DataParallel(model) ######## DataSet transform = DataAugmentation(type=config.transform) #center_384_1 dataset = MaskDataset(config.data_dir, transform=transform) len_valid_set = int(config.data_ratio * len(dataset)) len_train_set = len(dataset) - len_valid_set dataloaders, batch_num = {}, {} train_dataset, valid_dataset = torch.utils.data.random_split( dataset, [len_train_set, len_valid_set]) if config.random_split == 0: print("tbd") sampler = None if config.sampler == 'ImbalancedDatasetSampler': sampler = ImbalancedDatasetSampler(train_dataset) use_cuda = torch.cuda.is_available() dataloaders['train'] = torch.utils.data.DataLoader( train_dataset, batch_size=config.batch_size, sampler=sampler, shuffle=False, num_workers=4, pin_memory=use_cuda) dataloaders['valid'] = torch.utils.data.DataLoader( valid_dataset, batch_size=config.batch_size, shuffle=False, num_workers=4, pin_memory=use_cuda) batch_num['train'], batch_num['valid'] = len(dataloaders['train']), len( dataloaders['valid']) #Loss criterion = create_criterion(config.criterion) #Optimizer optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=0.9) if config.optim == "AdamP": optimizer = AdamP(model.parameters(), lr=config.lr, betas=(0.9, 0.999), weight_decay=config.weight_decay) elif config.optim == "AdamW": optimizer = optim.AdamW(model.parameters(), lr=config.lr, weight_decay=config.weight_decay) #Scheduler # Decay LR by a factor of 0.1 every 7 epochs #exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9) if config.lr_scheduler == "cosine": print('cosine') Q = math.floor(len(train_dataset) / config.batch_size + 1) * config.epochs / 7 scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=Q) #ConsineAnnealingWarmRestarts since = time.time() low_train = 0 best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 train_loss, train_acc, valid_loss, valid_acc = [], [], [], [] num_epochs = config.epochs for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'valid']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss, running_corrects, num_cnt = 0.0, 0, 0 runnnig_f1 = 0 # Iterate over data. idx = 0 for inputs, labels in dataloaders[phase]: idx += 1 inputs = inputs.to(device) labels = labels.to(device) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) _, preds = torch.max(outputs, 1) loss = criterion(outputs, labels) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() else: runnnig_f1 += f1_score(labels.data.detach().cpu(), preds.detach().cpu(), average='macro') # statistics val_loss = loss.item() * inputs.size(0) running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) num_cnt += len(labels) if idx % 100 == 0: _loss = loss.item() / config.batch_size print( f"Epoch[{epoch}/{config.epochs}]({idx}/{batch_num[phase]}) || " f"{phase} loss {_loss:4.4} ") if phase == 'train': scheduler.step() epoch_loss = float(running_loss / num_cnt) epoch_acc = float( (running_corrects.double() / num_cnt).cpu() * 100) epoch_f1 = float(runnnig_f1 / num_cnt) if phase == 'train': train_loss.append(epoch_loss) train_acc.append(epoch_acc) if config.wandb: wandb.log({"Train acc": epoch_acc}) else: valid_loss.append(epoch_loss) valid_acc.append(epoch_acc) if config.wandb: wandb.log({"Valid acc": epoch_acc}) wandb.log({"F1 Score": epoch_f1}) print('{} Loss: {:.2f} Acc: {:.1f} f1 :{:.3f}'.format( phase, epoch_loss, epoch_acc, epoch_f1)) # deep copy the model if phase == 'valid': if epoch_acc > best_acc: best_idx = epoch best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) print('==> best model saved - %d / %.1f' % (best_idx, best_acc)) low_train = 0 elif epoch_acc < best_acc: print('==> model finish') low_train += 1 if low_train > 0 and epoch > 4: break if phase == 'valid': if epoch_acc < 80: print('Stop valid is so low') break time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best valid Acc: %d - %.1f' % (best_idx, best_acc)) # load best model weights model.load_state_dict(best_model_wts) #torch.save(model.state_dict(), 'mask_model.pt') torch.save(model.state_dict(), config.name + '.pt') print('model saved') if config.wandb: wandb.finish() return model, best_idx, best_acc, train_loss, train_acc, valid_loss, valid_acc
def main(): parser = argparse.ArgumentParser(description='Image Classification.') parser.add_argument('--model-name', type=str, default='resnet50') parser.add_argument('--checkpoint-path', type=str, default='./checkpoint/NIH/', help='Path to save checkpoint, only the model with highest top1 acc will be saved,' 'And the records will also be writen in the folder') parser.add_argument('--batch-size', type=int, default=128, help='Batch size') parser.add_argument('--lr', type=float, default=1e-3, help='Initial learning rate') parser.add_argument('--epoch', type=int, default=200, help='Maximum training epoch') parser.add_argument('--start-epoch', type=int, default=0, help='Start training epoch') parser.add_argument('--root-dir', type=str, default='./dataset/NIH/images/', help='path to the image folder') parser.add_argument('--train_ann_file', default='./dataset/NIH/NIH_train.csv', type=str, help='path to csvfile') parser.add_argument('--val_ann_file', default='./dataset/NIH/NIH_val.csv', type=str, help='path to csvfile') # parser.add_argument('--test-dir', type=str, default='xxx/test', # help='path to the train folder, each class has a single folder') parser.add_argument('--cos', type=bool, default=False, help='Use cos learning rate sheduler') parser.add_argument('--schedule', default=[50, 100, 150], nargs='*', type=int, help='learning rate schedule (when to drop lr by 10x)') parser.add_argument('--pretrained', type=str, default='Resume', help='Load which pretrained model, ' 'None : Do not load any weight, random initialize' 'Imagenet : official Imagenet pretrained model,' 'MoCo : Transfer model from Moco, path in $transfer-resume$' 'Transfer : Transfer model from Supervised pretrained, path in $transfer-resume$' 'Resume : Load checkpoint for corrupted training process, path in $resume$') parser.add_argument('--transfer-resume', type=str, default='', help='Path to load transfering pretrained model') parser.add_argument('--resume', type=str, default='', help='Path to resume a checkpoint') parser.add_argument('--num-class', type=int, default=15, help='Number of class for the classification') parser.add_argument('--PRINT-INTERVAL', type=int, default=3, help='Number of batch to print the loss') parser.add_argument('--save-epoch', default=150, type=int) args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Device {}".format(device)) # Create checkpoint file if os.path.exists(args.checkpoint_path) == False: os.makedirs(args.checkpoint_path) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) test_trans = transforms.Compose( [ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor(), normalize ] ) train_trans = transforms.Compose( [transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), auto.ImageNetPolicy(), transforms.ToTensor(), normalize ] ) trainset = NIHDataset(root=args.root_dir, ann_file=args.train_ann_file, transforms=train_trans) categoriesid = trainset.get_categoriesid() valset = NIHDataset(root=args.root_dir, ann_file=args.val_ann_file, transforms=test_trans, categoriesid=categoriesid) def call_back_label(dataset, idx): return dataset.get_label(idx) train_loader = DataLoader(trainset, batch_size=args.batch_size, shuffle=False, sampler=ImbalancedDatasetSampler(trainset, callback_get_label=call_back_label), num_workers=16) val_loader = DataLoader(valset, batch_size=args.batch_size, num_workers=16) # test_loader = DataLoader(testset,batch_size=args.batch_size,num_workers=8,pin_memory=True) print('dataset created\ttotal {}\ttrain {}\tval {}'.format(len(trainset) + len(valset), len(trainset), len(valset))) # Define Loss Function LOSS_FUNC = nn.CrossEntropyLoss().to(device) print(args.model_name) if args.pretrained == 'Imagenet': # ImageNet supervised pretrained model print('ImageNet supervised pretrained model') model = MODEL_DICT[args.model_name](num_classes=args.num_class, pretrained=True) for name, param in model.named_parameters(): param.requires_grad = True elif args.pretrained == 'MoCo': # load weight from transfering model from moco print('Load weight from transfering model from moco') model = MODEL_DICT[args.model_name](num_classes=args.num_class, pretrained=False) for name, param in model.named_parameters(): param.requires_grad = True if args.transfer_resume: if os.path.isfile(args.transfer_resume): print("=> loading checkpoint '{}'".format(args.transfer_resume)) checkpoint = torch.load(args.transfer_resume, map_location="cpu") # rename moco pre-trained keys state_dict = checkpoint['state_dict'] for k in list(state_dict.keys()): # retain only encoder_q up to before the embedding layer if k.startswith('module.encoder_q') and not k.startswith('module.encoder_q.fc'): # remove prefix state_dict[k[len("module.encoder_q."):]] = state_dict[k] # delete renamed or unused k del state_dict[k] msg = model.load_state_dict(state_dict, strict=False) assert set(msg.missing_keys) == {"fc.weight", "fc.bias"} print("=> loaded pre-trained model '{}'".format(args.transfer_resume)) else: print("=> no checkpoint found at '{}'".format(args.transfer_resume)) # init the fc layer model.fc.weight.data.normal_(mean=0.0, std=0.01) model.fc.bias.data.zero_() elif args.pretrained == 'Transfer': # load weight from transfering model from supervised pretraining model = MODEL_DICT[args.model_name](num_classes=args.num_class, pretrained=False) print('Load weight from transfering model from supervised pretraining') for name, param in model.named_parameters(): param.requires_grad = True if args.transfer_resume: if os.path.isfile(args.transfer_resume): print("=> loading checkpoint '{}'".format(args.transfer_resume)) checkpoint = torch.load(args.transfer_resume) state_dict = checkpoint['state_dict'] for k in list(state_dict.keys()): # retain only encoder_q up to before the embedding layer if k.startswith("module.fc."): del state_dict[k] elif k.startswith('module.'): # remove prefix state_dict[k[len("module."):]] = state_dict[k] # delete renamed or unused k del state_dict[k] msg = model.load_state_dict(state_dict, strict=False) assert set(msg.missing_keys) == {"fc.weight", "fc.bias"} print("=> loaded checkpoint '{}' (epoch {})" .format(args.transfer_resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.transfer_resume)) # init the fc layer model.fc.weight.data.normal_(mean=0.0, std=0.01) model.fc.bias.data.zero_() else: # Random Initialize print('Random Initialize') model = MODEL_DICT[args.model_name](num_classes=args.num_class, pretrained=False) for name, param in model.named_parameters(): param.requires_grad = True print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model = model.to(device) # Optimizer and learning rate scheduler optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) if args.pretrained == 'Resume': # load weight from checkpoint print('Load weight from checkpoint {}'.format(args.resume)) load_resume(args, model, optimizer, args.resume) metric = [] for epoch in range(args.start_epoch, args.epoch): print('epoch: {}'.format(epoch)) adjust_learning_rate(optimizer, epoch, args) train_loss = train(model, train_loader, optimizer, args.PRINT_INTERVAL, epoch, args, LOSS_FUNC, device) acc1, acc5, confusion_matrix, val_loss, aucs = test(model, val_loader, args.num_class, LOSS_FUNC, device) metric.append(acc1) # Save train/val loss, acc1, acc5, confusion matrix(F1, recall, precision), AUCs record = { 'epoch': epoch + 1, 'train loss': train_loss, 'val loss': val_loss, 'acc1': acc1, 'acc5': acc5, 'confusion matrix': confusion_matrix, 'AUCs': aucs } torch.save(record, os.path.join(args.checkpoint_path, 'recordEpoch{}.pth.tar'.format(epoch))) # Only save the model with highest top1 acc if np.max(metric) == acc1: checkpoint = { 'epoch': epoch + 1, 'arch': args.model_name, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save(checkpoint, os.path.join(args.checkpoint_path, 'best.pth.tar')) print("Model Saved") if epoch != 0 and epoch % args.save_epoch == 0: filename = os.path.join(args.save_path, 'checkpoint_{:04d}.pth.tar'.format(epoch)) checkpoint={ 'epoch': epoch + 1, 'arch': args.model_name, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save(checkpoint, filename) print("Model Saved")
def makeLoaders(args, return_nclass=False, return_length=False, return_measurement=False, return_start=False, return_end=False): data = DataProcesser(args.data, datatable=False) # Select measurements and times, subset classes and split the dataset meas_var = data.detect_groups_times()['groups'] if args.measurement is None else args.measurement start_time = data.detect_groups_times()['times'][0] if args.startTime is None else args.startTime end_time = data.detect_groups_times()['times'][1] if args.endTime is None else args.endTime # Auto detect nclass = data.dataset[data.col_class].nunique() if args.nclass is None else args.nclass length = data.get_max_common_length() if args.length is None else args.length data.subset(sel_groups=meas_var, start_time=start_time, end_time=end_time) data.get_stats() data.split_sets() # Input preprocessing, this is done sequentially, on the fly when the input is passed to the network average_perChannel = [data.stats['mu'][meas]['train'] for meas in meas_var] ls_transforms = transforms.Compose([ RandomCrop(output_size=length, ignore_na_tails=True), Subtract(average_perChannel), ToTensor()]) # Define the dataset objects that associate data to preprocessing and define the content of a batch # A batch of myDataset contains: the trajectories, the trajectories identifier and the trajecotires class identifier data_train = myDataset(dataset=data.train_set, transform=ls_transforms) data_validation = myDataset(dataset=data.validation_set, transform=ls_transforms) if args.batch > len(data_train) or args.batch > len(data_validation): raise ValueError('Batch size ({}) must be smaller than the number of trajectories in the training ({}) and the validation ({}) sets.'.format(args.batch, len(data_train), len(data_validation))) # Quick recap of the data content print('Channels order: {} \nTime range: ({}, {}) \nClasses: {}'.format(meas_var, start_time, end_time, list(data.dataset[data.col_class].unique()))) nclass_data = len(list(data.dataset[data.col_class].unique())) if nclass != nclass_data: warnings.warn('The number of classes in the model output ({}) is not equal to the number of classes in the data ({}).'.format(nclass, nclass_data)) if args.imba: print('Attempting to handle classes imbalance.') train_loader = DataLoader( dataset=data_train, batch_size=args.batch, sampler=ImbalancedDatasetSampler(data_train, callback_get_label=get_label_forImbaSampler), num_workers=args.ncpuLoad, drop_last=True ) else: train_loader = DataLoader( dataset=data_train, batch_size=args.batch, shuffle=True, num_workers=args.ncpuLoad, drop_last=True ) validation_loader = DataLoader( dataset=data_validation, batch_size=args.batch, shuffle=False, num_workers=args.ncpuLoad, drop_last=True ) out = { 'train_loader': train_loader, 'validation_loader': validation_loader } if return_measurement: out['measurement'] = meas_var if return_start: out['start_time'] = start_time if return_end: out['end_time'] = end_time if return_nclass: out['nclass'] = nclass if return_length: out['length'] = length return out
transforms.RandomVerticalFlip(), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) transform_val = transforms.Compose([ transforms.Resize((input_size, input_size)), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) train_dataset = datasets.ImageFolder(data_dir_base + '/train', transform=transform_train) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, sampler=ImbalancedDatasetSampler(train_dataset), num_workers=8) val_dataset = datasets.ImageFolder(data_dir_base + '/val', transform=transform_val) val_dataloader = torch.utils.data.DataLoader( val_dataset, batch_size=batch_size, sampler=ImbalancedDatasetSampler(val_dataset), num_workers=4) test_dataset = datasets.ImageFolder(data_dir_base + '/test', transform=transform_val) test_dataloader = torch.utils.data.DataLoader( test_dataset, batch_size=batch_size,
std=[0.229, 0.224, 0.225]) ]) transforms_val = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) train_dataset = datasets.ImageFolder( root= '/mnt/processed/private/msds2020cpt12/shopee-code-league/product-detection/train/train', transform=transforms_train) sampler = ImbalancedDatasetSampler(train_dataset) train_dataset = CutMix(train_dataset, num_class=42, beta=1.0, prob=0.5, num_mix=2) # test_dataset = datasets.ImageFolder( # root='/mnt/processed/private/msds2020cpt12/shopee-code-league/product-detection/split_2/val', # transform=transforms_val) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, shuffle=False) # test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
def train_dataloader(self): dataset = TensorDataset(self.traning_set[0], self.traning_set[1]) data_loader = DataLoader(dataset, batch_size=128, sampler=ImbalancedDatasetSampler( dataset, callback_get_label=lambda dataset, idx: int(dataset[idx][1]))) return data_loader
################################################################################## ########################################Data and Loader##################################### train_dataset, val_dataset, test_dataset = get_datasets( dataset_name, noise_rate, noise_type) if (dataset_name == "Clothes1M"): train_dataset.image_keys = train_dataset.clean_train_keys train_dataset.labels = train_dataset.clean_train_labels train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, num_workers=num_workers, drop_last=False, pin_memory=True, sampler=ImbalancedDatasetSampler( train_dataset, callback_get_label=_get_labels)) val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, num_workers=num_workers, drop_last=False, shuffle=False, pin_memory=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, num_workers=num_workers, drop_last=False, shuffle=False, pin_memory=True) infer_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size,
def main(): logging.basicConfig(stream=sys.stdout, level=logging.INFO) print_config() # Setup directories dirs = setup_directories() # Setup torch device device, using_gpu = create_device("cuda") # Load and randomize images # HACKATON image and segmentation data hackathon_dir = os.path.join(dirs["data"], 'HACKATHON') map_fn = lambda x: (x[0], int(x[1])) with open(os.path.join(hackathon_dir, "train.txt"), 'r') as fp: train_info_hackathon = [ map_fn(entry.strip().split(',')) for entry in fp.readlines() ] image_dir = os.path.join(hackathon_dir, 'images', 'train') seg_dir = os.path.join(hackathon_dir, 'segmentations', 'train') _train_data_hackathon = get_data_from_info(image_dir, seg_dir, train_info_hackathon, dual_output=False) _train_data_hackathon = large_image_splitter(_train_data_hackathon, dirs["cache"]) copy_list = transform_and_copy(_train_data_hackathon, dirs['cache']) balance_training_data2(_train_data_hackathon, copy_list, seed=72) # PSUF data """psuf_dir = os.path.join(dirs["data"], 'psuf') with open(os.path.join(psuf_dir, "train.txt"), 'r') as fp: train_info = [entry.strip().split(',') for entry in fp.readlines()] image_dir = os.path.join(psuf_dir, 'images') train_data_psuf = get_data_from_info(image_dir, None, train_info)""" # Split data into train, validate and test train_split, test_data_hackathon = train_test_split(_train_data_hackathon, test_size=0.2, shuffle=True, random_state=42) train_data_hackathon, valid_data_hackathon = train_test_split( train_split, test_size=0.2, shuffle=True, random_state=43) #balance_training_data(train_data_hackathon, seed=72) #balance_training_data(valid_data_hackathon, seed=73) #balance_training_data(test_data_hackathon, seed=74) # Setup transforms # Crop foreground crop_foreground = CropForegroundd(keys=["image"], source_key="image", margin=(5, 5, 0), select_fn=lambda x: x != 0) # Crop Z crop_z = RelativeCropZd(keys=["image"], relative_z_roi=(0.07, 0.12)) # Window width and level (window center) WW, WL = 1500, -600 ct_window = CTWindowd(keys=["image"], width=WW, level=WL) # Random axis flip rand_x_flip = RandFlipd(keys=["image"], spatial_axis=0, prob=0.50) rand_y_flip = RandFlipd(keys=["image"], spatial_axis=1, prob=0.50) rand_z_flip = RandFlipd(keys=["image"], spatial_axis=2, prob=0.50) # Rand affine transform rand_affine = RandAffined(keys=["image"], prob=0.5, rotate_range=(0, 0, np.pi / 12), shear_range=(0.07, 0.07, 0.0), translate_range=(0, 0, 0), scale_range=(0.07, 0.07, 0.0), padding_mode="zeros") # Pad image to have hight at least 30 spatial_pad = SpatialPadd(keys=["image"], spatial_size=(-1, -1, 30)) resize = Resized(keys=["image"], spatial_size=(int(512 * 0.50), int(512 * 0.50), -1), mode="trilinear") # Apply Gaussian noise rand_gaussian_noise = RandGaussianNoised(keys=["image"], prob=0.25, mean=0.0, std=0.1) # Create transforms common_transform = Compose([ LoadImaged(keys=["image"]), ct_window, CTSegmentation(keys=["image"]), AddChanneld(keys=["image"]), resize, crop_foreground, crop_z, spatial_pad, ]) hackathon_train_transform = Compose([ common_transform, rand_x_flip, rand_y_flip, rand_z_flip, rand_affine, rand_gaussian_noise, ToTensord(keys=["image"]), ]).flatten() hackathon_valid_transfrom = Compose([ common_transform, #rand_x_flip, #rand_y_flip, #rand_z_flip, #rand_affine, ToTensord(keys=["image"]), ]).flatten() hackathon_test_transfrom = Compose([ common_transform, ToTensord(keys=["image"]), ]).flatten() psuf_transforms = Compose([ LoadImaged(keys=["image"]), AddChanneld(keys=["image"]), ToTensord(keys=["image"]), ]) # Setup data #set_determinism(seed=100) train_dataset = PersistentDataset(data=train_data_hackathon[:], transform=hackathon_train_transform, cache_dir=dirs["persistent"]) valid_dataset = PersistentDataset(data=valid_data_hackathon[:], transform=hackathon_valid_transfrom, cache_dir=dirs["persistent"]) test_dataset = PersistentDataset(data=test_data_hackathon[:], transform=hackathon_test_transfrom, cache_dir=dirs["persistent"]) train_loader = DataLoader( train_dataset, batch_size=4, #shuffle=True, pin_memory=using_gpu, num_workers=2, sampler=ImbalancedDatasetSampler( train_data_hackathon, callback_get_label=lambda x, i: x[i]['_label']), collate_fn=PadListDataCollate(Method.SYMMETRIC, NumpyPadMode.CONSTANT)) valid_loader = DataLoader( valid_dataset, batch_size=4, shuffle=False, pin_memory=using_gpu, num_workers=2, sampler=ImbalancedDatasetSampler( valid_data_hackathon, callback_get_label=lambda x, i: x[i]['_label']), collate_fn=PadListDataCollate(Method.SYMMETRIC, NumpyPadMode.CONSTANT)) test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, pin_memory=using_gpu, num_workers=2, collate_fn=PadListDataCollate( Method.SYMMETRIC, NumpyPadMode.CONSTANT)) # Setup network, loss function, optimizer and scheduler network = nets.DenseNet121(spatial_dims=3, in_channels=1, out_channels=1).to(device) # pos_weight for class imbalance _, n, p = calculate_class_imbalance(train_data_hackathon) pos_weight = torch.Tensor([n, p]).to(device) loss_function = torch.nn.BCEWithLogitsLoss(pos_weight) optimizer = torch.optim.Adam(network.parameters(), lr=1e-4, weight_decay=0) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95, last_epoch=-1) # Setup validator and trainer valid_post_transforms = Compose([ Activationsd(keys="pred", sigmoid=True), #Activationsd(keys="pred", softmax=True), ]) validator = Validator(device=device, val_data_loader=valid_loader, network=network, post_transform=valid_post_transforms, amp=using_gpu, non_blocking=using_gpu) trainer = Trainer(device=device, out_dir=dirs["out"], out_name="DenseNet121", max_epochs=120, validation_epoch=1, validation_interval=1, train_data_loader=train_loader, network=network, optimizer=optimizer, loss_function=loss_function, lr_scheduler=None, validator=validator, amp=using_gpu, non_blocking=using_gpu) """x_max, y_max, z_max, size_max = 0, 0, 0, 0 for data in valid_loader: image = data["image"] label = data["label"] print() print(len(data['image_transforms'])) #print(data['image_transforms']) print(label) shape = image.shape x_max = max(x_max, shape[-3]) y_max = max(y_max, shape[-2]) z_max = max(z_max, shape[-1]) size = int(image.nelement()*image.element_size()/1024/1024) size_max = max(size_max, size) print("shape:", shape, "size:", str(size)+"MB") #multi_slice_viewer(image[0, 0, :, :, :], str(label)) print(x_max, y_max, z_max, str(size_max)+"MB") exit()""" # Run trainer train_output = trainer.run() # Setup tester tester = Tester(device=device, test_data_loader=test_loader, load_dir=train_output, out_dir=dirs["out"], network=network, post_transform=valid_post_transforms, non_blocking=using_gpu, amp=using_gpu) # Run tester tester.run()