def main(): # Data loading train_dataset = CustomDataset(root=config.root_train, annFile=config.annFile_train, transforms=config.train_transforms, catagory=config.CATEGORY_FILTER) val_dataset = CustomDataset(root=config.root_train, annFile=config.annFile_train, transforms=config.val_transforms, catagory=config.CATEGORY_FILTER) train_loader = DataLoader(dataset=train_dataset, batch_size=16, num_workers=4, pin_memory=True, shuffle=False, drop_last=True) val_loader = DataLoader(dataset=val_dataset, batch_size=16, num_workers=4, pin_memory=True, shuffle=False, drop_last=True) # Model model = YoloV3(num_classes=config.C).to(device=config.DEVICE) optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY) loss_function = YoloLoss() scalar = torch.cuda.amp.GradScaler() # Miscellaneous scaled_anchors = (torch.tensor(config.anchors) * torch.tensor( config.Scale).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)).to( config.DEVICE) #writer = SummaryWriter() current_time = time.time() print("Train loader length:", len(train_loader)) # Training loop model.train() for cycle, (x, y) in enumerate(train_loader): print("Current cycle:", cycle) delta_time, current_time = time_function(current_time)
def main_worker(gpu, args): torch.cuda.set_device(gpu) device = torch.device('cuda') basic_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) unlabeled_dataset = CustomDataset(root=args.data, split='unlabeled', transform=basic_transforms) labeled_dataset = CustomDataset(root=args.data, split='train', transform=basic_transforms) unlabeled_dataset = torch.utils.data.DataLoader(unlabeled_dataset, batch_size=args.batch_size, num_workers=args.workers) labeled_dataset = torch.utils.data.DataLoader(labeled_dataset, batch_size=args.batch_size, num_workers=args.workers) # load pre-trained model from checkpoint model = ft_model() model.load_state_dict( torch.load(args.checkpoint_dir / args.checkpoint_file)) model.eval() model.to(device) unlabeled_entropy = [] labeled_entropy = [] since = time.time() steps = 100 with torch.no_grad(): for i, batch in enumerate(unlabeled_dataset): entropy = get_entropy(model, batch, device).tolist() unlabeled_entropy.extend(entropy) if i % steps == 0: print(i, sum(entropy) / len(entropy)) for i, batch in enumerate(labeled_dataset): entropy = get_entropy(model, batch, device).tolist() labeled_entropy.extend(entropy) if i % steps == 0: print(i, sum(entropy) / len(entropy)) return unlabeled_entropy, labeled_entropy
def __init__(self, FLAGS): self.use_cuda = FLAGS.cuda and torch.cuda.is_available() self.gpu = FLAGS.gpu self.train = FLAGS.train self.batch_size = FLAGS.batch_size self.data_path = FLAGS.data_path self.learning_rate = FLAGS.learning_rate self.beta1 = FLAGS.beta_1 self.beta2 = FLAGS.beta_2 self.load_path = FLAGS.load_path self.epoch = 0 self.end_epoch = FLAGS.end_epoch self.model = SRModel().double() if not self.train: if self.use_cuda: self.model.load_state_dict( torch.load(os.path.join('checkpoints', self.load_path))) else: self.model.load_state_dict( torch.load(os.path.join('checkpoints', self.load_path), map_location='cpu')) if self.use_cuda: torch.cuda.set_device(self.gpu) self.model.cuda() self.optim = optim.Adam(list(self.model.parameters()), lr=self.learning_rate, betas=(self.beta1, self.beta2)) date_time = datetime.now().strftime("%b%d_%H-%M-%S") self.save_folder = os.path.join('checkpoints', date_time) if (not os.path.exists(self.save_folder)) and (self.train == True): os.makedirs(self.save_folder, exist_ok=True) if self.train: self.writer = SummaryWriter() settings = '' for arg in vars(FLAGS): settings += str(arg) + '=' + str(getattr(FLAGS, arg)) + ' ' self.writer.add_text('Settings', settings) print("Loading data...") self.train_set = CustomDataset(root=self.data_path, train=True) # self.test_set = CustomDataset(root=self.data_path,train=False) self.loader = cycle( DataLoader(self.train_set, batch_size=self.batch_size, shuffle=True, num_workers=0, drop_last=True))
def main(): train_dataset = CustomDataset(root=config.root_train, annFile=config.annFile_train, transforms=config.transforms, catagory=config.CATEGORY_FILTER) val_dataset = CustomDataset(root=config.root_train, annFile=config.annFile_train, transforms=config.transforms, catagory=config.CATEGORY_FILTER) train_loader = DataLoader(dataset=train_dataset, batch_size=16, num_workers=4, pin_memory=True, shuffle=False, drop_last=True) val_loader = DataLoader(dataset=val_dataset, batch_size=16, num_workers=4, pin_memory=True, shuffle=False, drop_last=True) for index, (x, y) in enumerate(train_loader): x = draw_y_on_x(x, y) grid = torchvision.utils.make_grid(x, nrow=4) # Save batch grid as image image_dir = "./batch_dir" image_dir_exists = os.path.exists(image_dir) if not image_dir_exists: os.makedirs(image_dir) img_name = str(image_dir) + "/batch_" + str(index) + ".png" save_image(grid.float() / 255, img_name) print(index) print(x.shape) print(y.shape) for index, (x, y) in enumerate(val_loader): x = draw_y_on_x(x, y) grid = torchvision.utils.make_grid(x, nrow=4) # Save batch grid as image image_dir = "./batch_dir" image_dir_exists = os.path.exists(image_dir) if not image_dir_exists: os.makedirs(image_dir) img_name = str(image_dir) + "/batch_" + str(index) + ".png" save_image(grid.float() / 255, img_name) print(index) print(x.shape) print(y.shape)
def main(): # Data loading # train_dataset = CustomDataset(root=config.root_train, annFile=config.annFile_train, transforms=config.train_transforms, catagory=config.CATEGORY_FILTER) val_dataset = CustomDataset(root=config.root_val, annFile=config.annFile_val, transforms=config.val_transforms, catagory=config.CATEGORY_FILTER) # train_loader = DataLoader(dataset=train_dataset, batch_size=16, num_workers=2, pin_memory=True, shuffle=True, drop_last=True) val_loader = DataLoader(dataset=val_dataset, batch_size=16, num_workers=2, pin_memory=True, shuffle=False, drop_last=True) # Model model = YoloV3(num_classes=config.C).to(device=config.DEVICE) optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY) # Miscellaneous scaled_anchors = (torch.tensor(config.anchors) * torch.tensor( config.Scale).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)).to( config.DEVICE) # Loading previously saved model weights load_checkpoint("res50_35k.pth.tar", model, optimizer, config.LEARNING_RATE) # Rendering loop model.eval() for cycle, (x, y) in enumerate(val_loader): with torch.no_grad(): x_gpu = x.to(config.DEVICE) yp = model(x_gpu) yp = [yp[0].to('cpu'), yp[1].to('cpu'), yp[2].to('cpu')] x = denormalize(x) * 255 draw_y_on_x(x, y) draw_yp_on_x(x, yp, probability_threshold=0.5, anchors=config.anchors) # Save batch grid as image image_dir = "./batch_dir" image_dir_exists = os.path.exists(image_dir) if not image_dir_exists: os.makedirs(image_dir) img_name = str(image_dir) + "/batch_" + str(cycle) + ".png" save_image(x / 255, img_name)
def __init__(self, args): self.args = args # data dataset = CustomDataset(args) self.collate_fn = dataset.collate_fn # For zero-padding # For K-fold train_size = int(len(dataset) / args.cv_num) # Randomly split a dataset into non-overlapping new datasets of given lengths self.dataset_list = random_split(dataset, [train_size for i in range(args.cv_num -1)] +\ [len(dataset) - (args.cv_num - 1)*train_size]) # 각 데이터 size가 들어있는 리스트 # arguments, loss self.vocab_size = len(dataset.vocab) ## ?? self.pad_idx = dataset.vocab.word2idx['<pad>'] ## ?? self.embeddings = dataset.pretrained_embeddings self.criterion = nn.BCEWithLogitsLoss().to(device) # make directory if not exist data path if not osp.isdir(args.ck_path): os.makedirs(args.ck_path, exist_ok=True ) # If exist_ok is False, FileExistsError is raised.
def main_worker(gpu, args): torch.cuda.set_device(gpu) device = torch.device('cuda') train_transforms = transforms.Compose([ transforms.RandomResizedCrop(args.image_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) validation_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) train_dataset = CustomDataset(root=args.data, split='train', transform=train_transforms) validation_dataset = CustomDataset(root=args.data, split='val', transform=validation_transforms) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) validation_dataloader = torch.utils.data.DataLoader( validation_dataset, batch_size=args.batch_size, num_workers=args.workers) # load pre-trained model from checkpoint model = ft_model(args.pretrained_algo, args.model_name, args.pretrained_dir_file, args.finetuning, args.num_classes) model.train() model.to(device) criterion = nn.CrossEntropyLoss() criterion.to(device) # optimizer = torch.optim.Adam(model.parameters(), lr=0.001) optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9) args.checkpoint_dir.mkdir(parents=True, exist_ok=True) stats_file = open(args.checkpoint_dir / '{}_stats.txt'.format(args.checkpoint_file[:-4]), 'a', buffering=1) print(' '.join(sys.argv)) print(' '.join(sys.argv), file=stats_file) best_validation_accuracy = 0 since = time.time() for i in range(args.epochs): total_train_loss = 0.0 total_train_correct = 0.0 total_validation_loss = 0.0 total_validation_correct = 0.0 model.train() for batch in train_dataloader: loss, correct = get_loss_and_correct(model, batch, criterion, device) optimizer.zero_grad() loss.backward() optimizer.step() total_train_loss += loss.item() total_train_correct += correct.item() with torch.no_grad(): for batch in validation_dataloader: loss, correct = get_loss_and_correct(model, batch, criterion, device) total_validation_loss += loss.item() total_validation_correct += correct.item() mean_train_loss = total_train_loss / len(train_dataset) train_accuracy = total_train_correct / len(train_dataset) mean_validation_loss = total_validation_loss / len(validation_dataset) validation_accuracy = total_validation_correct / len( validation_dataset) # save the best model if validation_accuracy > best_validation_accuracy: best_validation_accuracy = validation_accuracy torch.save(model.state_dict(), args.checkpoint_dir / args.checkpoint_file) time_elapsed = time.time() - since print( 'Epoch: {}, Train Loss: {:.4f}, Val Loss: {:.4f}, Train Acc: {:.4f}, Val Acc: {:.4f}, Time: {}' .format(i, mean_train_loss, mean_validation_loss, train_accuracy, validation_accuracy, time_elapsed)) print( 'Epoch: {}, Train Loss: {:.4f}, Val Loss: {:.4f}, Train Acc: {:.4f}, Val Acc: {:.4f}, Time: {}' .format(i, mean_train_loss, mean_validation_loss, train_accuracy, validation_accuracy, time_elapsed), file=stats_file)
def train(img_dir,classes_csv,model_fname=None,resnet_depth=50,epochs=1000,steps=100,train_split=0.8,out_dir ='',out_prefix=''): if not os.path.exists(out_dir): os.makedirs(out_dir) # Create the data loaders # Get all image fnames in folder img_list = [] if not isinstance(img_dir, list): img_dir = [img_dir] for dir in img_dir: for file in os.listdir(dir): if file.endswith(".png"): img_list.append(dir + file) randomised_list = random.sample(img_list, len(img_list)) num_train = int(0.8*len(img_list)) train_imgs, val_imgs = randomised_list[:num_train], randomised_list[num_train:] dataset_train = CustomDataset(img_list=train_imgs, class_list=classes_csv, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) dataset_val = CustomDataset(img_list=val_imgs, class_list=classes_csv,transform=transforms.Compose([Normalizer(), Resizer()])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if resnet_depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif resnet_depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif resnet_depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif resnet_depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif resnet_depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') # retinanet = torch.load(model_fname) if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() if model_fname is not None: retinanet.load_state_dict(torch.load(model_fname)) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() start_time = time.clock() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) # print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue print('Epoch: {} | Running loss: {:1.5f} | Elapsed Time: {}'.format(epoch_num, np.mean(loss_hist),(time.clock() - start_time)/60)) mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) if (epoch_num) % steps == 0: torch.save(retinanet.module, '{}{}_model_{}.pt'.format(out_dir, out_prefix, epoch_num)) torch.save(retinanet.state_dict(), '{}{}_state_{}.pt'.format(out_dir, out_prefix, epoch_num)) torch.save(retinanet, out_dir + '{}model_final.pt'.format(out_prefix)) torch.save(retinanet.state_dict(), out_dir + '{}state_final_.pt'.format(out_prefix))
parser.add_argument("--train", action='store_true', default=False) parser.add_argument("--predict", action='store_true', default=False) parser.add_argument("--zazu_timer", action='store_true', default=False) parser.add_argument("--checkpoint_path", type=str, default='/root/ZazuML/best_checkpoint.pt') parser.add_argument("--dataset_path", type=str, default='') parser.add_argument("--output_path", type=str, default='') args = parser.parse_args() with open('configs.json', 'r') as fp: configs = json.load(fp) logger = init_logging(__name__) from dataloader import CustomDataset CustomDataset("../data/tiny_coco", 'coco') zazu = ZaZu(configs['model_name'], configs['home_path'], configs['annotation_type']) if args.search: zazu.search(configs['search_method'], configs['epochs'], configs['max_trials'], configs['max_instances_at_once'], configs['augmentation_search']) if args.train: adapter = TrialAdapter(0) adapter.load(checkpoint_path=args.checkpoint_path) adapter.train() print('model checkpoint is saved to: ', adapter.checkpoint_path) if args.predict: predict(pred_on_path=args.dataset_path, output_path=args.output_path, checkpoint_path=args.checkpoint_path,
aug_transform3 = transforms.Compose([ transforms.RandomResizedCrop((96, 96), scale=(0.08, 1.0), ratio=(0.75, 1.3333333333333333)), rnd_color_jitter3, rnd_gray, transforms.ToTensor(), ]) train_transform = transforms.Compose([ transforms.ToTensor(), ]) trainset = CustomDataset(root='/dataset', split="train", transform=train_transform) augset1 = CustomDataset(root='/dataset', split="train", transform=aug_transform1) augset2 = CustomDataset(root='/dataset', split="train", transform=aug_transform2) augset3 = CustomDataset(root='/dataset', split="train", transform=aug_transform3) trainloader = torch.utils.data.DataLoader(trainset, batch_size=1024, shuffle=True, num_workers=2) augloader1 = torch.utils.data.DataLoader(augset1,
def main_worker(gpu, args): global best_acc1 args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) # create model print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch](num_classes=800, norm_layer=SubBatchNorm2d) # load from pre-trained, before DistributedDataParallel constructor if args.pretrained: if os.path.isfile(args.pretrained): print("=> loading freezed checkpoint '{}'".format(args.pretrained)) states = torch.load(args.pretrained) model.load_state_dict(states) args.start_epoch = 0 print("=> checkpoint loaded '{}'".format(args.pretrained)) else: raise ValueError("=> no freezed checkpoint found at '{}'".format( args.pretrained)) if args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: print("=> ERROR: gpu must be assigned") # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) # optimize only the linear classifier optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) cudnn.benchmark = True # Data loading code traindir = args.data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform_train = transforms.Compose([ transforms.RandomResizedCrop(96), # add crop resize transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) transform_eval = transforms.Compose([ transforms.Resize(128), # add resize transforms.CenterCrop(96), # add crop transforms.ToTensor(), normalize ]) train_dataset = CustomDataset(traindir, 'train', transform_train) eval_dataset = CustomDataset(traindir, 'val', transform_eval) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) eval_loader = torch.utils.data.DataLoader(eval_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) # training code for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, args) if epoch == 0 or (epoch + 1) % args.eval_per_n_epoch == 0: accuracy = evaluate(eval_loader, model, args) print(f"=> Epoch: {epoch+1}, accuracy: {accuracy:.4f}") # remember best acc and save checkpoint is_best = accuracy > best_acc1 best_acc1 = max(accuracy, best_acc1) print(f"=> Epoch: {epoch+1}, isBest? : {is_best}") save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'accuracy': accuracy, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best, save_dir=args.checkpoint_dir, epoch=(epoch + 1), filename=os.path.join( args.checkpoint_dir, 'checkpoint_{:03d}.pth.tar'.format(epoch + 1)))
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.multiprocessing_distributed and args.gpu != 0: def print_pass(*args): pass builtins.print = print_pass if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model print("=> creating model '{}'".format(args.arch)) if args.arch == 'resnet50': model = Model(resnet50,args,width=1) elif args.arch == 'resnet50x2': model = Model(resnet50,args,width=2) elif args.arch == 'resnet50x4': model = Model(resnet50,args,width=4) else: raise NotImplementedError('model not supported {}'.format(args.arch)) if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.batch_size_u = int(args.batch_size_u / ngpus_per_node) args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) #find_unused_parameters=True else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) # define loss function (criterion) and optimizer criteria_x = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True ) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True print("=> preparing dataset") # Data loading code transform_strong = transforms.Compose([ transforms.RandomResizedCrop(96, scale=(0.2, 1.)), transforms.RandomApply([ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) ], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) ]) transform_weak = transforms.Compose([ transforms.RandomResizedCrop(96, scale=(0.2, 1.)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) ]) transform_eval = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) ]) three_crops_transform = loader.ThreeCropsTransform(transform_weak, transform_strong, transform_strong) unlabeled_dataset = CustomDataset(args.data, 'unlabeled', transform=three_crops_transform) labeled_dataset = CustomDataset(args.data, 'train', transform=transform_weak) #labeled_sampler = torch.utils.data.distributed.DistributedSampler(labeled_dataset) labeled_loader = torch.utils.data.DataLoader( labeled_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) #unlabeled_sampler = torch.utils.data.distributed.DistributedSampler(unlabeled_dataset) unlabeled_loader = torch.utils.data.DataLoader( unlabeled_dataset, batch_size=int(args.batch_size_u), shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(CustomDataset('/content/dataset', 'val', transform_eval), batch_size=64, shuffle=False, num_workers=args.workers, pin_memory=True) # create loggers if args.gpu==0: tb_logger = tensorboard_logger.Logger(logdir=os.path.join(args.exp_dir,'tensorboard'), flush_secs=2) logger = setup_default_logging(args) logger.info(dict(args._get_kwargs())) else: tb_logger = None logger = None for epoch in range(args.start_epoch, args.epochs): if epoch==0: args.m = 0.99 # larger update in first epoch else: args.m = args.moco_m # args.lr=0.01 adjust_learning_rate(optimizer, epoch, args) train(labeled_loader, unlabeled_loader, model, criteria_x, optimizer, epoch, args, logger, tb_logger) # evaluate on validation set acc1 = validate(val_loader, model, args, logger, tb_logger, epoch) if not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): save_checkpoint({ 'args': args, 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer' : optimizer.state_dict() },filename='{}/checkpoint_{:04d}.pt'.format(args.exp_dir,epoch)) # evaluate ema model acc1 = validate(val_loader, model, args, logger, tb_logger, -1)
def main(): #TODO: Get args # python3 train_fixmatch.py --checkpoint-path ./checkpoint_path/model.pth --batch-size 1 --num-epochs 1 --num-steps 1 --train-from-start 1 --dataset-folder ./dataset parser = argparse.ArgumentParser() parser.add_argument('--checkpoint-path', type=str, default= "./checkpoints/model_barlow_20h.pth.tar") parser.add_argument('--batch-size', type=int, default= 512) parser.add_argument('--num-epochs', type=int, default= 10) parser.add_argument('--num-steps', type=int, default= 1) parser.add_argument('--train-from-start', type= int, default= 0) parser.add_argument('--dataset-folder', type= str, default= "./dataset") parser.add_argument('--learning-rate', type = float, default= 0.01) parser.add_argument('--threshold', type = float, default= 0.5) parser.add_argument('--mu', type= int, default= 7) parser.add_argument('--lambd', type= float, default= 0.005) parser.add_argument('--momentum', type= float, default= 0.9) parser.add_argument('--weight-decay', type= float, default= 1.5*1e-6) parser.add_argument('--warmup-epochs', type= int, default= 2) parser.add_argument('--scale-loss', type = float, default= 1.0/32.0) parser.add_argument('--wide', type= int, default= 0) args = parser.parse_args() dataset_folder = args.dataset_folder batch_size = args.batch_size n_epochs = args.num_epochs num_classes = 800 lambd = args.lambd weight_decay = args.weight_decay checkpoint_path = args.checkpoint_path train_from_start = args.train_from_start if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") unlabeled_train_dataset = CustomDataset(root= dataset_folder, split = "unlabeled", transform = TransformBarlowTwins()) unlabeled_train_loader = DataLoader(unlabeled_train_dataset, batch_size= batch_size, shuffle= True, num_workers= 4) if args.wide == 1: model = lightly.models.BarlowTwins(wide_resnet50_2(pretrained= False), num_ftrs= 2048) else: model = lightly.models.BarlowTwins(resnet18(pretrained= False), num_ftrs= 512) optimizer = LARS(model.parameters(), lr=0, weight_decay=weight_decay, weight_decay_filter=exclude_bias_and_norm, lars_adaptation_filter=exclude_bias_and_norm) criterion = lightly.loss.BarlowTwinsLoss() start_epoch = 0 model.train() losses = Average() model = model.to(device) criterion = criterion.to(device) if train_from_start == 0: assert os.path.isfile(checkpoint_path), "Error: no checkpoint directory found!" print("Restoring model from checkpoint") # args.out = os.path.dirname(args.resume) checkpoint = torch.load(checkpoint_path, map_location= device) if args.wide == 0: model = torch.nn.DataParallel(model) # best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] - 1 model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = torch.nn.DataParallel(model) criterion = torch.nn.DataParallel(criterion) model = model.to(device) criterion = criterion.to(device) #TODO # scaler = torch.cuda.amp.GradScaler() # model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) for epoch in tqdm(range(start_epoch, n_epochs)): # for batch_idx in tqdm(range(n_steps)): ## CHECK for batch_idx, batch in enumerate(tqdm(unlabeled_train_loader)): y_a = batch[0][0].to(device) y_b = batch[0][1].to(device) z_a, z_b = model(y_a, y_b) loss = criterion(z_a, z_b).mean() lr = adjust_learning_rate(args, optimizer, unlabeled_train_loader, epoch * len(unlabeled_train_loader) + batch_idx) optimizer.zero_grad() # scaler.scale(loss).backward() # scaler.step(optimizer) # scaler.update() loss.backward() optimizer.step() losses.update(loss.item()) if batch_idx % 25 == 0: print(f"Epoch number: {epoch}, loss_avg: {losses.avg}, loss: {loss.item()}, lr: {lr}", flush= True) if torch.cuda.device_count() > 1: save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.module.state_dict(), 'optimizer': optimizer.state_dict() }, checkpoint_path) else: save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, checkpoint_path)
from torchvision import datasets, transforms, models from dataloader import CustomDataset from submission import get_model #parser part parser = argparse.ArgumentParser() parser.add_argument('--checkpoint-dir', type=str) args = parser.parse_args() #train composition operations train_transform = transforms.Compose([ transforms.ToTensor(), ]) #trainset and trainloaders trainset = CustomDataset(root='/dataset', split="train", transform=train_transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=256, shuffle=True, num_workers=2) #net = get_model().cuda() net = get_model() criterion = nn.CrossEntropyLoss() #optimizer = torch.optim.Adam(net.parameters(), lr=0.05) print('Start Training') #training part
def main_worker(gpu, args): # args.rank += gpu # # torch.distributed.init_process_group( # backend='nccl', init_method=args.dist_url, # world_size=args.world_size, rank=args.rank) # if args.rank == 0: args.checkpoint_dir.mkdir(parents=True, exist_ok=True) stats_file = open(args.checkpoint_dir / 'stats_{}.txt'.format(args.resnet_layers), 'a', buffering=1) print(' '.join(sys.argv)) print(' '.join(sys.argv), file=stats_file) torch.cuda.set_device(gpu) # torch.backends.cudnn.benchmark = True model = BarlowTwins(args).cuda(gpu) # model = nn.SyncBatchNorm.convert_sync_batchnorm(model) # model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu]) optimizer = LARS(model.parameters(), lr=0, weight_decay=args.weight_decay, weight_decay_filter=exclude_bias_and_norm, lars_adaptation_filter=exclude_bias_and_norm) # automatically resume from checkpoint if it exists if (args.checkpoint_dir / 'checkpoint_{}.pth'.format(args.resnet_layers)).is_file(): ckpt = torch.load(args.checkpoint_dir / 'checkpoint_{}.pth'.format(args.resnet_layers), map_location='cpu') start_epoch = ckpt['epoch'] model.load_state_dict(ckpt['model']) optimizer.load_state_dict(ckpt['optimizer']) else: start_epoch = 0 # dataset = torchvision.datasets.ImageFolder(args.data / 'train', Transform()) # sampler = torch.utils.data.distributed.DistributedSampler(dataset) # assert args.batch_size % args.world_size == 0 # per_device_batch_size = args.batch_size // args.world_size # loader = torch.utils.data.DataLoader( # dataset, batch_size=per_device_batch_size, num_workers=args.workers, # pin_memory=True, sampler=sampler) dataset = CustomDataset(root=args.data, split='unlabeled', transform=Transform(args.image_size)) loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, num_workers=args.workers) start_time = time.time() scaler = torch.cuda.amp.GradScaler() for epoch in range(start_epoch, args.epochs): # sampler.set_epoch(epoch) for step, ((y1, y2), _) in enumerate(loader, start=epoch * len(loader)): y1 = y1.cuda(gpu, non_blocking=True) y2 = y2.cuda(gpu, non_blocking=True) lr = adjust_learning_rate(args, optimizer, loader, step) optimizer.zero_grad() with torch.cuda.amp.autocast(): loss = model.forward(y1, y2) scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() if step % args.print_freq == 0: # torch.distributed.reduce(loss.div_(args.world_size), 0) # if args.rank == 0: stats = dict(epoch=epoch, step=step, learning_rate=lr, loss=loss.item(), time=int(time.time() - start_time)) print(json.dumps(stats)) print(json.dumps(stats), file=stats_file) # if args.rank == 0: # save checkpoint state = dict(epoch=epoch + 1, model=model.state_dict(), optimizer=optimizer.state_dict()) torch.save( state, args.checkpoint_dir / 'checkpoint_{}.pth'.format(args.resnet_layers)) # if args.rank == 0: # save final model torch.save(model.backbone.state_dict(), args.checkpoint_dir / 'resnet{}.pth'.format(args.resnet_layers))
from torch.utils.tensorboard import SummaryWriter from torch.utils.data.sampler import SubsetRandomSampler from torch.utils.data import DataLoader from torch.optim.rmsprop import RMSprop import torch.nn as nn from tqdm import tqdm import torch import numpy as np import sys configs = Configs() device = 'cuda' if torch.cuda.is_available() else 'cpu' DATASET_PATH = r"D:\Code\CV_project\lstm_fast_fcn\dataset.pt" dataset = CustomDataset(configs) dataset_size = len(dataset) indices = list(range(dataset_size)) np.random.seed(0) np.random.shuffle(indices) split = int(np.floor(configs.valSplit * dataset_size)) trainIndices, valIndices = indices[split:], indices[:split] trainLoader = DataLoader(dataset, batch_size=configs.batchSize, num_workers=0, sampler=SubsetRandomSampler(trainIndices)) valLoader = DataLoader(dataset, batch_size=configs.batchSize, num_workers=0,
def main(): parser = argparse.ArgumentParser() parser.add_argument('--checkpoint-path', type=str, default="./checkpoints/model_transfer.pth.tar") parser.add_argument('--transfer-path', type=str, default="./checkpoints/model_barlow.pth.tar") parser.add_argument('--best-path', type=str, default="./checkpoints/model_barlow_best.pth.tar") parser.add_argument('--batch-size', type=int, default=10) parser.add_argument('--num-epochs', type=int, default=100) parser.add_argument('--dataset-folder', type=str, default="./dataset") parser.add_argument('--new-dataset-folder', type=str, default="./dataset") parser.add_argument('--learning-rate-classifier', type=float, default=0.001) parser.add_argument('--learning-rate-model', type=float, default=0.001) parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--weight-decay', type=float, default=0.001) parser.add_argument('--fine-tune', type=int, default=0) parser.add_argument('--wide', type=int, default=0) parser.add_argument('--model-name', type=str, default="moco") parser.add_argument('--dropout', type=float, default=0) parser.add_argument('--new-data', type=int, default=0) parser.add_argument('--seed', type=int, default=0) args = parser.parse_args() dataset_folder = args.dataset_folder batch_size = args.batch_size batch_size_val = 256 #5120 n_epochs = args.num_epochs weight_decay = args.weight_decay checkpoint_path = args.checkpoint_path random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) torch.backends.cudnn.deterministic = True print(f"Training with seed {args.seed}") if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") train_transform, val_transform = get_transforms( ) #TODO Get new transforms file if args.new_data == 0: labeled_train_dataset = CustomDataset(root=args.dataset_folder, split="train", transform=train_transform) else: labeled_train_dataset = CustomDataset(root=args.new_dataset_folder, split="train_new", transform=train_transform) val_dataset = CustomDataset(root=args.dataset_folder, split="val", transform=val_transform) labeled_train_loader = DataLoader(labeled_train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) val_loader = DataLoader(val_dataset, batch_size=batch_size_val, shuffle=False, num_workers=4) resnet = lightly.models.ResNetGenerator('resnet-18', 1, num_splits=0) backbone = torch.nn.Sequential( *list(resnet.children())[:-1], torch.nn.AdaptiveAvgPool2d(1), ) if args.model_name == "moco": model = lightly.models.MoCo(backbone, num_ftrs=512, m=0.99, batch_shuffle=True) else: if args.wide == 1: model = lightly.models.BarlowTwins( wide_resnet50_2(pretrained=False), num_ftrs=2048) else: model = lightly.models.BarlowTwins(resnet18(pretrained=False), num_ftrs=512) checkpoint = torch.load(args.transfer_path, map_location=device) # print(checkpoint['state_dict'].keys()) # print("printed keys") # print(model_barlow.state_dict().keys()) # print("printed model keys") # if args.wide == 0: # model = torch.nn.DataParallel(model) model.load_state_dict(checkpoint['state_dict']) # print(model_barlow) if args.wide == 0: model = model.backbone else: model = model.backbone if args.wide == 1: classifier = Classifier(ip=2048, dp=args.dropout) else: classifier = Classifier(ip=512, dp=args.dropout) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = torch.nn.DataParallel(model) classifier = torch.nn.DataParallel(classifier) if not args.fine_tune: model.requires_grad_(False) model = model.to(device) classifier = classifier.to(device) param_groups = [ dict(params=classifier.parameters(), lr=args.learning_rate_classifier) ] if args.fine_tune: param_groups.append( dict(params=model.parameters(), lr=args.learning_rate_model)) optimizer = optim.Adam(param_groups, weight_decay=weight_decay) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, n_epochs) start_epoch = 0 losses = Average() criterion = torch.nn.CrossEntropyLoss().to(device) best_val_accuracy = 25.0 #TODO for epoch in tqdm(range(start_epoch, n_epochs)): if args.fine_tune: model.train() else: model.eval() classifier.train() for batch_idx, batch in enumerate(tqdm(labeled_train_loader)): img = batch[0].to(device) labels = batch[1].to(device) model_out = model(img) if args.model_name == "moco": model_out = model_out.squeeze() model_out = torch.nn.functional.normalize(model_out, dim=1) logits = classifier(model_out) loss = criterion(logits, labels) optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() losses.update(loss.item()) if batch_idx % 25 == 0: print( f"Epoch number: {epoch}, loss_avg: {losses.avg}, loss: {loss.item()}, best accuracy: {best_val_accuracy:.2f}", flush=True) save_checkpoint( { 'epoch': epoch + 1, 'model_state_dict': model.state_dict(), 'classifier_state_dict': classifier.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict() }, checkpoint_path) model.eval() with torch.no_grad(): val_loss = 0 val_size = 0 total = 0 correct = 0 for batch in val_loader: model_out = model(batch[0].to(device)) if args.model_name == "moco": model_out = model_out.squeeze() model_out = torch.nn.functional.normalize(model_out, dim=1) logits_val = classifier(model_out) labels = batch[1].to(device) val_loss += F.cross_entropy(logits_val, labels) _, predicted = torch.max(logits_val.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() val_size += 1 # break print( f"Val loss: {val_loss/val_size}, Accuracy: {(100 * correct / total):.2f}%", flush=True) if 100 * correct / total > best_val_accuracy: best_val_accuracy = 100 * correct / total best_val_loss = val_loss / val_size print( f"Saving the best model with {best_val_accuracy:.2f}% accuracy and {best_val_loss:.2f} loss", flush=True) save_checkpoint( { 'epoch': epoch + 1, 'model_state_dict': model.state_dict(), 'classifier_state_dict': classifier.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'best_val_accuracy': best_val_accuracy, 'best_val_loss': best_val_loss }, args.best_path)
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu if args.gpu is not None: print("=> Use GPU: {} for training".format(args.gpu)) print("=> creating model '{}'".format(args.arch)) model = MoCo(models.__dict__[args.arch], args.moco_dim, args.moco_k, args.moco_m, args.moco_t, args.mlp) print(model) if args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: raise NotImplementedError("Only Single GPU is supported.") # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) del checkpoint # release GPU memory else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = args.data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if args.aug_plus: # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709 augmentation = [ transforms.RandomResizedCrop( 96, scale=(0.2, 1.)), # Add back RandomResizedCrop transforms.RandomApply( [ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) # not strengthened ], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.RandomApply([GaussianBlur([.1, 2.])], p=0.5), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ] else: # MoCo v1's aug: the same as InstDisc https://arxiv.org/abs/1805.01978 augmentation = [ transforms.RandomResizedCrop(96, scale=(0.2, 1.)), transforms.RandomGrayscale(p=0.2), transforms.ColorJitter(0.4, 0.4, 0.4, 0.4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ] train_dataset = CustomDataset( traindir, "unlabeled", TwoCropsTransform(transforms.Compose(augmentation))) if args.small_set: print('=> Using 1/10 unlabeled set') train_sampler = torch.utils.data.RandomSampler(train_dataset, replacement=True, num_samples=51200) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.workers, pin_memory=True, sampler=train_sampler) else: print('=> Using full unlabeled set') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, drop_last=True) print("=> Start Training.") for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, args) if epoch == 0 or (epoch + 1) % args.save_checkpoint_per_epoch == 0: save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best=False, filename=os.path.join( args.checkpoint_dir, 'checkpoint_{:03d}.pth'.format(epoch + 1)))
def main(): # Data loading train_dataset = CustomDataset(root=config.root_train, annFile=config.annFile_train, transforms=config.train_transforms, catagory=config.CATEGORY_FILTER) val_dataset = CustomDataset(root=config.root_val, annFile=config.annFile_val, transforms=config.val_transforms, catagory=config.CATEGORY_FILTER) train_loader = DataLoader(dataset=train_dataset, batch_size=8, num_workers=2, pin_memory=True, shuffle=True, drop_last=True) val_loader = DataLoader(dataset=val_dataset, batch_size=8, num_workers=2, pin_memory=True, shuffle=False, drop_last=True) # Model model = YoloV3(num_classes=config.C).to(device=config.DEVICE) # from model_external import YOLOv3 # model = YOLOv3(num_classes=90).to(device=config.DEVICE) optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY) from loss_external import YoloLoss loss_function = YoloLoss().to(device=config.DEVICE) # Miscellaneous scaled_anchors = (torch.tensor(config.anchors) * torch.tensor( config.Scale).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)).to( config.DEVICE) writer = SummaryWriter() current_time = time.time() # Loading previously saved model weights if config.LOAD_MODEL: load_checkpoint("cp.pth.tar", model, optimizer, config.LEARNING_RATE) print(torch.cuda.memory_summary(device=None, abbreviated=False)) # Training loop for cycle in range(config.CYCLES): print("Cycle:", cycle) x, y = next(iter(val_loader)) x = x.to(config.DEVICE) y0, y1, y2 = (y[0].to(config.DEVICE), y[1].to(config.DEVICE), y[2].to(config.DEVICE)) yp = model(x) loss_0 = loss_function(predictions=yp[0], target=y0, anchors=scaled_anchors[0]) loss_1 = loss_function(predictions=yp[1], target=y1, anchors=scaled_anchors[1]) loss_2 = loss_function(predictions=yp[2], target=y2, anchors=scaled_anchors[2]) loss = loss_0 + loss_1 + loss_2 loss.backward() optimizer.step() optimizer.zero_grad() # Run validation if cycle % 100 == 0 and cycle != 0: model.eval() losses = [] with torch.no_grad(): x, y = next(iter(val_loader)) x = x.to(config.DEVICE) y0, y1, y2 = (y[0].to(config.DEVICE), y[1].to(config.DEVICE), y[2].to(config.DEVICE)) yp = model(x) loss_0 = loss_function(predictions=yp[0], target=y0, anchors=scaled_anchors[0]) loss_1 = loss_function(predictions=yp[1], target=y1, anchors=scaled_anchors[1]) loss_2 = loss_function(predictions=yp[2], target=y2, anchors=scaled_anchors[2]) loss = loss_0 + loss_1 + loss_2 losses.append(loss) avg_val_loss = sum(losses) / len(losses) writer.add_scalar("val_loss: ", avg_val_loss, cycle) model.train() # Run validation """ if cycle % 100 == 0 and cycle != 0: model.eval() x, y = next(iter(val_loader)) x = x.float() x = x.to(config.DEVICE) # y0, y1, y2 = (y[0].to(config.DEVICE), y[1].to(config.DEVICE), y[2].to(config.DEVICE)) with torch.no_grad(): yp = model(x) # Move predictions to cpu yp = [yp[0].to('cpu'), yp[1].to('cpu'), yp[2].to('cpu')] # boxes_from_yp(yp) returns all yp bboxes in a batch yp_boxes = boxes_from_yp(yp=yp, iou_threshold=config.MAP_IOU_THRESH, threshold=config.CONF_THRESHOLD) # boxes_from_y(y) returns all y bboxes in a batch y_boxes = boxes_from_y(y=y) """ # Save model if cycle % 1000 == 0 and cycle != 0: save_checkpoint(model, optimizer, cycle, filename=config.CHECKPOINT_FILE) # Rendering loop if cycle % 100 == 0 and cycle != 0: model.eval() x, y = next(iter(val_loader)) with torch.no_grad(): x_gpu = x.to(config.DEVICE) yp = model(x_gpu) yp = [yp[0].to('cpu'), yp[1].to('cpu'), yp[2].to('cpu')] x = denormalize(x) * 255 draw_y_on_x(x, y) draw_yp_on_x(x, yp, probability_threshold=0.5, anchors=config.anchors) # Save batch grid as image image_dir = "./batch_dir" image_dir_exists = os.path.exists(image_dir) if not image_dir_exists: os.makedirs(image_dir) img_name = str(image_dir) + "/batch_" + str(cycle) + ".png" save_image(x / 255, img_name) model.train() writer.add_scalar("train_loss: ", loss.item(), cycle) delta_time, current_time = time_function(current_time) writer.add_scalar("Epoch Duration [s]", delta_time, cycle) writer.flush()
parser = argparse.ArgumentParser() parser.add_argument('--checkpoint-dir', type=str, default='checkpoints/') parser.add_argument('--model-name', type=str, default='simsiam') parser.add_argument('--epochs', type=int, default=1) parser.add_argument('--net_size', type=int, default=50) parser.add_argument('--temperature', type=int, default=1) args = parser.parse_args() checkpoint_path = args.checkpoint_dir + args.model_name # sys.path.insert(1, args.checkpoint_dir) # PATH = '/Users/colinwan/Desktop/NYU_MSDS/2572/FinalProject/DL21SP20' PATH = '' train_dataset = CustomDataset(root=PATH+'/dataset', split='unlabeled', transform=get_aug(train=True, image_size=96)) BATCH_SIZE = 256 print(len(train_dataset)) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=1) if torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") model = SimSiam().to(device) check = os.path.exists( os.path.join(checkpoint_path, args.model_name+"_encoder_{}.pth".format(args.net_size))) print(os.path.join(checkpoint_path, args.model_name+"_encoder_{}.pth".format(args.net_size)))
from dataloader import CustomDataset from model.seq2seq import Seq2Seq device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if torch.cuda.is_available(): with_cuda = True device_ids = None print("------- GPU Working -------") print("[Current GPU]:" + str(torch.cuda.get_device_name(0))) else: with_cuda = False device_ids = None print("------- CPU Working -------") data_loaded = CustomDataset(path='data/eng-fra.txt') pad_idx = data_loaded.vocab_stoi['<pad>'] hidden_size = 1000 vocab_len = len(data_loaded.vocab_stoi) embedding_size = 620 batch_size = 80 train_loader = torchdata.DataLoader(dataset=data_loaded, collate_fn=data_loaded.custom_collate_fn, batch_size=batch_size) trg_max_seq_len = next( iter(train_loader))[1].size(1) - 1 # <s> is not included epochs = 1
class fullmodel(nn.Module): def __init__(self, num_classes=800): super(fullmodel, self).__init__() self.pretrain = resnet50() self.pretrain.fc = nn.Linear(self.pretrain.fc.in_features, num_classes) self.relu = nn.ReLU() self.linear = nn.Linear(num_classes, num_classes) def forward(self, x): x = self.relu(self.pretrain(x)) outputs = self.linear(x) return outputs unlabeled_dataset = CustomDataset(PATH + '/dataset', 'unlabeled', transform=transform_unlabled) unlabeled_trainloader = torch.utils.data.DataLoader(unlabeled_dataset, batch_size=BATCH_SIZE * mu, shuffle=True, num_workers=1) labeled_dataset = CustomDataset(PATH + '/dataset', 'train', transform=transform_labeled) labeled_trainloader = torch.utils.data.DataLoader(labeled_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=1) validation_dataset = CustomDataset(PATH + '/dataset', 'val',
transforms.ColorJitter(hue=.1, saturation=.1, contrast=.1), transforms.RandomRotation(20, resample=Image.BILINEAR), # transforms.GaussianBlur(7, sigma=(0.1, 1.0)), transforms.ToTensor(), # convert PIL to Pytorch Tensor normalize, ]) validation_transforms = transforms.Compose([ transforms.Resize((96, 96)), transforms.ToTensor(), normalize, ]) # path = '/Users/colinwan/Desktop/NYU_MSDS/2572/FinalProject/DL21SP20' path = '' train_dataset = CustomDataset(root=path + '/dataset', split='train', transform=train_transforms) validation_dataset = CustomDataset(root=path + '/dataset', split='val', transform=validation_transforms) BATCH_SIZE = 512 train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=1) validation_dataloader = torch.utils.data.DataLoader(validation_dataset, batch_size=BATCH_SIZE, num_workers=1) # from tqdm.notebook import tqdm
import os import argparse import torch import torch.nn as nn import torch.nn.functional as F from torchvision import datasets, transforms, models from dataloader import CustomDataset from submission import get_model, eval_transform, team_id, team_name, email_address parser = argparse.ArgumentParser() parser.add_argument('--checkpoint-path', type=str) args = parser.parse_args() evalset = CustomDataset(root='/dataset', split="val", transform=eval_transform) evalloader = torch.utils.data.DataLoader(evalset, batch_size=256, shuffle=False, num_workers=2) net = get_model() checkpoint = torch.load(args.checkpoint_path) net.load_state_dict(checkpoint) net = net.cuda() net.eval() correct = 0 total = 0 with torch.no_grad(): for data in evalloader: images, labels = data images = images.cuda()
#################################################################3 def addIndexToTrainData(trainset): returnList = [] for iC, (x, y) in enumerate(trainset): if iC % 1000 == 0: print(">>>>>>>>", iC) returnList.append((x, y, iC)) return returnList # ========================================================================== # ========================================================================== from submission import get_model, eval_transform, team_id, team_name, email_address trainset = CustomDataset(root='./dataset', split="train", transform=train_transform) # trainset = addIndexToTrainData(trainset) trainloader = torch.utils.data.DataLoader(trainset, batch_size=256, shuffle=True, num_workers=2) net = get_model().cuda() net = torch.nn.DataParallel(net) net = net.cuda() # trainLabeledImage(net, trainloader) # unLabeledSet = CustomDataset(root='./dataset', split="unlabeled", transform=train_transform)
def main(): # t_img = cv2.imread(f'{root_path}/imgs/AFW_1051618982_1_0.jpg') lr = 0.001 models = [MobileNetV1] model_name = ['mobilev1'] for init_model in models: img_size = [(224, 224, 3), (224, 224, 1), (256, 256, 3), (256, 256, 1)] m_name = model_name[0] print(f'Train Model {m_name}') for i_size in img_size: print(f'Train Image size {i_size}') dataset = CustomDataset(root_path, anno_name, dataset_type, i_size) print(f"Train dataset size {len(dataset)}") train_loader = DataLoader(dataset, batch_size=8, num_workers=0, shuffle=True) model = init_model(i_size[0], i_size[-1], num_landmark) model.to(DEVICE) criterion = torch.nn.MSELoss().to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, 'min') pre_train = False model_path = 'E:/models' if pre_train: saved_state = torch.load( f'{model_path}/f_landmark-tcdcn-52-0.0004.pth') model.load_state_dict(saved_state['model_state_dict']) optimizer.load_state_dict(saved_state['optimizer_state_dict']) init_epoch = saved_state['Epoch'] min_loss = saved_state['loss'] else: init_epoch = 0 min_loss = 1 epochs = 10000 print(f'min loss : {min_loss}') for epoch in range(init_epoch, epochs): print( f'{epoch} epoch start! : {datetime.datetime.now().strftime("%Y.%m.%d %H:%M:%S")}' ) loss = train(train_loader, model, criterion, optimizer, DEVICE, scheduler) print(f" Average Loss : {loss:.6f}") if min_loss > loss: min_loss = loss state = { 'Epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss } model_path = os.path.join( model_save_path, f'f_landmark-{m_name}-{i_size}.pth') torch.save(state, model_path) print( f'Saved model_{m_name} _ [loss : {loss:.6f}, save_path : {model_path}\n' ) if loss < 0.000001: break
def main(): #TODO: Get args # python3 train_fixmatch.py --checkpoint-path ./checkpoint_path/model.pth --batch-size 1 --num-epochs 1 --num-steps 1 --train-from-start 1 --dataset-folder ./dataset parser = argparse.ArgumentParser() parser.add_argument('--checkpoint-path', type=str, default= "./checkpoints/model_fm_transfer.pth.tar") parser.add_argument('--transfer-path', type=str, default= "./checkpoints/model_transfer.pth.tar") parser.add_argument('--best-path', type= str, default= "./checkpoints/model_barlow_best.pth.tar") parser.add_argument('--batch-size', type=int, default= 64) parser.add_argument('--num-epochs', type=int, default= 10) parser.add_argument('--num-steps', type=int, default= 10) parser.add_argument('--train-from-start', type= int, default= 1) parser.add_argument('--dataset-folder', type= str, default= "./dataset") parser.add_argument('--new-dataset-folder', type= str, default= "./dataset") parser.add_argument('--learning-rate', type = float, default= 0.01) parser.add_argument('--threshold', type = float, default= 0.5) parser.add_argument('--mu', type= int, default= 7) parser.add_argument('--lambd', type= int, default= 1) parser.add_argument('--momentum', type= float, default= 0.9) parser.add_argument('--weight-decay', type= float, default= 0.001) parser.add_argument('--layers', type= int, default= 18) parser.add_argument('--fine-tune', type= int, default= 1) parser.add_argument('--new-data', type= int, default= 0) args = parser.parse_args() dataset_folder = args.dataset_folder batch_size_labeled = args.batch_size mu = args.mu batch_size_unlabeled = mu * args.batch_size batch_size_val = 256 #5120 n_epochs = args.num_epochs n_steps = args.num_steps num_classes = 800 threshold = args.threshold learning_rate = args.learning_rate momentum = args.momentum lamd = args.lambd tau = 0.95 weight_decay = args.weight_decay checkpoint_path = args.checkpoint_path train_from_start = args.train_from_start n_layers = args.layers if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") # print("pwd: ", os.getcwd()) train_transform, val_transform = get_transforms() if args.new_data == 0: labeled_train_dataset = CustomDataset(root= args.dataset_folder, split = "train", transform = train_transform) else: labeled_train_dataset = CustomDataset(root= args.new_dataset_folder, split = "train_new", transform = train_transform) # labeled_train_dataset = CustomDataset(root= dataset_folder, split = "train", transform = train_transform) unlabeled_train_dataset = CustomDataset(root= dataset_folder, split = "unlabeled", transform = TransformFixMatch(mean = 0, std = 0))#TODO val_dataset = CustomDataset(root= dataset_folder, split = "val", transform = val_transform) labeled_train_loader = DataLoader(labeled_train_dataset, batch_size= batch_size_labeled, shuffle= True, num_workers= 4) unlabeled_train_loader = DataLoader(unlabeled_train_dataset, batch_size= batch_size_unlabeled, shuffle= True, num_workers= 4) val_loader = DataLoader(val_dataset, batch_size= batch_size_val, shuffle= False, num_workers= 4) labeled_iter = iter(labeled_train_loader) unlabeled_iter = iter(unlabeled_train_loader) model = wide_resnet50_2(pretrained=False, num_classes = 800) classifier = Classifier(ip= 2048, dp = 0) start_epoch = 0 checkpoint = torch.load(args.transfer_path, map_location= device) model.load_state_dict(checkpoint['model_state_dict']) classifier.load_state_dict(checkpoint['classifier_state_dict']) param_groups = [dict(params=classifier.parameters(), lr=args.learning_rate)] if args.fine_tune: param_groups.append(dict(params=model.parameters(), lr=args.learning_rate)) optimizer = torch.optim.SGD(param_groups, lr = learning_rate, momentum= momentum, nesterov= True, weight_decay= weight_decay) scheduler = get_cosine_schedule_with_warmup(optimizer, 0, num_training_steps= n_epochs * n_steps) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = torch.nn.DataParallel(model) classifier = torch.nn.DataParallel(classifier) if train_from_start == 0: assert os.path.isfile(checkpoint_path), "Error: no checkpoint directory found!" print("Restoring model from checkpoint") # args.out = os.path.dirname(args.resume) checkpoint = torch.load(checkpoint_path) # best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] - 1 model.load_state_dict(checkpoint['backbone_state_dict']) classifier.load_state_dict(checkpoint['classifier_state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) model = model.to(device) classifier = classifier.to(device) model.train() losses = Average() losses_l = Average() losses_u = Average() mask_probs = Average() best_val_accuracy = 25.0 #TODO for epoch in tqdm(range(start_epoch, n_epochs)): if args.fine_tune: model.train() classifier.train() else: model.eval() classifier.train() for batch_idx in tqdm(range(n_steps)): try: img_lab, targets_lab = labeled_iter.next() except: labeled_iter = iter(labeled_train_loader) img_lab, targets_lab = labeled_iter.next() try: unlab, _ = unlabeled_iter.next() img_weak = unlab[0] img_strong = unlab[1] except: unlabeled_iter = iter(unlabeled_train_loader) unlab, _ = unlabeled_iter.next() img_weak = unlab[0] img_strong = unlab[1] img_lab = img_lab.to(device) targets_lab = targets_lab.to(device) img_weak = img_weak.to(device) img_strong = img_strong.to(device) img_cat = torch.cat((img_lab, img_weak, img_strong), dim = 0) logits_cat = classifier(model(img_cat)) logits_lab = logits_cat[:batch_size_labeled] # print(logits_lab.size()) logits_unlab = logits_cat[batch_size_labeled:] # print(logits_unlab) logits_weak, logits_strong = torch.chunk(logits_unlab, chunks= 2, dim = 0) pseudo_label = torch.softmax(logits_weak.detach()/tau, dim= 1) max_probs, targets_unlab = torch.max(pseudo_label, dim= 1) mask = max_probs.ge(threshold).float() loss_labeled = F.cross_entropy(logits_lab, targets_lab, reduction='mean') # print("CE: ", F.cross_entropy(logits_strong, targets_unlab, reduction= 'none').size()) loss_unlabeled = (F.cross_entropy(logits_strong, targets_unlab, reduction= 'none') * mask).mean() # print("Loss labelled, loss unlabelled: ", loss_labeled, loss_unlabeled) loss_total = loss_labeled + lamd * loss_unlabeled # print("Total loss: ", loss_total) # loss_epoch += loss_total # loss_lab_epoch += loss_labeled # loss_unlab_epoch += loss_unlabeled losses.update(loss_total.item()) losses_l.update(loss_labeled.item()) losses_u.update(loss_unlabeled.item()) mask_probs.update(mask.mean().item()) optimizer.zero_grad() loss_total.backward() optimizer.step() scheduler.step() # break if batch_idx % 25 == 0: print(f"Epoch number: {epoch}, loss: {losses.avg}, loss lab: {losses_l.avg}, loss unlab: {losses_u.avg}, mask: {mask_probs.avg}, loss_here: {loss_total.item()}, best accuracy: {best_val_accuracy:.2f}", flush= True) # print(optimizer.param_groups[0]['lr']) save_checkpoint({ 'epoch': epoch + 1, 'model_state_dict': model.state_dict(), 'classifier_state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), }, checkpoint_path) model.eval() classifier.eval() with torch.no_grad(): val_loss = 0 val_size = 0 total = 0 correct = 0 for batch in val_loader: logits_val = classifier(model(batch[0].to(device))) labels = batch[1].to(device) val_loss += F.cross_entropy(logits_val, labels) _, predicted = torch.max(logits_val.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() val_size += 1 # break print(f"Val loss: {val_loss/val_size}, Accuracy: {(100 * correct / total):.2f}%", flush= True) if 100 * correct / total > best_val_accuracy: best_val_accuracy = 100 * correct / total best_val_loss = val_loss/val_size print(f"Saving the best model with {best_val_accuracy:.2f}% accuracy and {best_val_loss:.2f} loss", flush= True) save_checkpoint({ 'epoch': epoch + 1, 'model_state_dict': model.state_dict(), 'classifier_state_dict': classifier.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'best_val_accuracy': best_val_accuracy, 'best_val_loss': best_val_loss }, args.best_path) model.train() classifier.train()
def infer(img_dir,classes_csv,model_fname,resnet_depth,score_thresh,out_dir, results_fname): # Create dataset img_list = [] if not isinstance(img_dir, list): img_dir = [img_dir] for dir in img_dir: for file in os.listdir(dir): if file.endswith(".png"): img_list.append(dir + file) dataset_val = CustomDataset(img_list=img_list, class_list=classes_csv, transform=transforms.Compose([Normalizer(), Resizer()])) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) print(dataset_val.num_classes()) # Create the model if resnet_depth == 18: retinanet = model.resnet18(num_classes=dataset_val.num_classes()) elif resnet_depth == 34: retinanet = model.resnet34(num_classes=dataset_val.num_classes()) elif resnet_depth == 50: retinanet = model.resnet50(num_classes=dataset_val.num_classes()) elif resnet_depth == 101: retinanet = model.resnet101(num_classes=dataset_val.num_classes()) elif resnet_depth == 152: retinanet = model.resnet152(num_classes=dataset_val.num_classes()) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') state_dict = torch.load(model_fname) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove `module.` new_state_dict[name] = v # load params retinanet.load_state_dict(new_state_dict) if use_gpu: retinanet = retinanet.cuda() retinanet.eval() unnormalize = UnNormalizer() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) results = [] for idx, data in enumerate(dataloader_val): with torch.no_grad(): st = time.time() scores, classification, transformed_anchors = retinanet(data['img'].cuda().float()) print('Elapsed time: {}, Num objects: {}'.format(time.time() - st, len(scores))) idxs = np.where(scores > score_thresh) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)).astype(np.uint8).copy() bboxes = [] for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0] / data['scale'][0]) y1 = int(bbox[1] / data['scale'][0]) x2 = int(bbox[2] / data['scale'][0]) y2 = int(bbox[3] / data['scale'][0]) label_name = dataset_val.labels[int(classification[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) score = float(scores[idxs[0][j]]) bboxes.append([x1, y1, x2, y2, score]) img_fname = ntpath.basename(data['img_fname'][0]) results.append([img_fname, bboxes]) # fig, ax = plt.subplots(figsize=(12, 12)) # ax.imshow(img, interpolation='bilinear') with open(out_dir+results_fname,"wb") as output_file: pickle.dump(results, output_file)
aug_transform3 = transforms.Compose([ transforms.RandomResizedCrop((96, 96), scale=(0.08, 1.0), ratio=(0.75, 1.3333333333333333)), rnd_color_jitter3, rnd_gray, transforms.ToTensor(), ]) train_transform = transforms.Compose([ transforms.ToTensor(), ]) trainset = CustomDataset(root='/dataset', split="train", transform=train_transform) augset1 = CustomDataset(root='/dataset', split="train", transform=aug_transform1) augset2 = CustomDataset(root='/dataset', split="train", transform=aug_transform2) augset3 = CustomDataset(root='/dataset', split="train", transform=aug_transform3) trainloader = torch.utils.data.DataLoader(trainset, batch_size=256, shuffle=True, num_workers=2) augloader1 = torch.utils.data.DataLoader(augset1,
def main_worker(gpu, args): global best_acc1 args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) # create model print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch](num_classes=800, norm_layer=SubBatchNorm2d) # model = models.__dict__[args.arch](num_classes=800) # # freeze all layers but the last fc for name, param in model.named_parameters(): if name not in ['fc.weight', 'fc.bias']: param.requires_grad = False # init the fc layer model.fc.weight.data.normal_(mean=0.0, std=0.1) model.fc.bias.data.zero_() # load from pre-trained, before DistributedDataParallel constructor if args.pretrained: if os.path.isfile(args.pretrained): print("=> loading checkpoint '{}'".format(args.pretrained)) checkpoint = torch.load(args.pretrained, map_location="cpu") # rename moco pre-trained keys state_dict = checkpoint['state_dict'] for k in list(state_dict.keys()): # retain only encoder_q up to before the embedding layer if k.startswith( 'encoder_q') and not k.startswith('encoder_q.fc'): # remove prefix state_dict[k[len("encoder_q."):]] = state_dict[k] # delete renamed or unused k del state_dict[k] args.start_epoch = 0 msg = model.load_state_dict(state_dict, strict=False) # print(f"=> loading state_dict: \n{list(state_dict.keys())}") # print(f"=> missing state keys: \n{msg.missing_keys}") assert set(msg.missing_keys) == {"fc.weight", "fc.bias"} print("=> loaded pre-trained model '{}'".format(args.pretrained)) else: raise ValueError("=> no pre-trained model found at '{}'".format( args.pretrained)) if args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: print("=> ERROR: gpu must be assigned") # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) # optimize only the linear classifier parameters = list(filter(lambda p: p.requires_grad, model.parameters())) assert len(parameters) == 2 # fc.weight, fc.bias optimizer = torch.optim.SGD(parameters, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optimizer = torch.optim.SGD(model.parameters(), args.lr, # momentum=args.momentum, # weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] if args.gpu is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpu) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) del checkpoint else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = args.data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform_train = transforms.Compose([ transforms.RandomResizedCrop(96), # add crop resize transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) transform_eval = transforms.Compose([ transforms.Resize(128), # add resize transforms.CenterCrop(96), # add crop transforms.ToTensor(), normalize ]) train_dataset = CustomDataset(traindir, 'train', transform_train) eval_dataset = CustomDataset(traindir, 'val', transform_eval) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) eval_loader = torch.utils.data.DataLoader(eval_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) # training code for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, args) if epoch == 0 or (epoch + 1) % args.eval_per_n_epoch == 0: accuracy = evaluate(eval_loader, model, args) print(f"=> Epoch: {epoch+1}, accuracy: {accuracy:.4f}") # remember best acc and save checkpoint is_best = accuracy > best_acc1 best_acc1 = max(accuracy, best_acc1) print(f"=> Epoch: {epoch+1}, isBest? : {is_best}") save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'accuracy': accuracy, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best, save_dir=args.checkpoint_dir, epoch=(epoch + 1), filename=os.path.join( args.checkpoint_dir, 'checkpoint_{:03d}.pth.tar'.format(epoch + 1)))