def main(): # See all possible arguments in src/transformers/training_args.py # or by passing the --help flag to this script. # We now keep distinct sets of args, for a cleaner separation of concerns. parser = HfArgumentParser( (ModelArguments, DataTrainingArguments, TrainingArguments)) if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): # If we pass only one argument to the script and it's the path to a json file, # let's parse it to get our arguments. model_args, data_args, training_args = parser.parse_json_file( json_file=os.path.abspath(sys.argv[1])) else: model_args, data_args, training_args = parser.parse_args_into_dataclasses( ) # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) log_level = training_args.get_process_log_level() logger.setLevel(log_level) transformers.utils.logging.set_verbosity(log_level) transformers.utils.logging.enable_default_handler() transformers.utils.logging.enable_explicit_format() # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) logger.info(f"Training/evaluation parameters {training_args}") # Detecting last checkpoint. last_checkpoint = None if os.path.isdir( training_args.output_dir ) and training_args.do_train and not training_args.overwrite_output_dir: last_checkpoint = get_last_checkpoint(training_args.output_dir) if last_checkpoint is None and len(os.listdir( training_args.output_dir)) > 0: raise ValueError( f"Output directory ({training_args.output_dir}) already exists and is not empty. " "Use --overwrite_output_dir to overcome.") elif last_checkpoint is not None and training_args.resume_from_checkpoint is None: logger.info( f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." ) # Initialize our dataset and prepare it for the 'image-classification' task. if data_args.dataset_name is not None: dataset = load_dataset( data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, task="image-classification", use_auth_token=True if model_args.use_auth_token else None, ) else: data_files = {} if data_args.train_dir is not None: data_files["train"] = os.path.join(data_args.train_dir, "**") if data_args.validation_dir is not None: data_files["validation"] = os.path.join(data_args.validation_dir, "**") dataset = load_dataset( "imagefolder", data_files=data_files, cache_dir=model_args.cache_dir, task="image-classification", ) # If we don't have a validation split, split off a percentage of train as validation. data_args.train_val_split = None if "validation" in dataset.keys( ) else data_args.train_val_split if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0: split = dataset["train"].train_test_split(data_args.train_val_split) dataset["train"] = split["train"] dataset["validation"] = split["test"] # Prepare label mappings. # We'll include these in the model's config to get human readable labels in the Inference API. labels = dataset["train"].features["labels"].names label2id, id2label = dict(), dict() for i, label in enumerate(labels): label2id[label] = str(i) id2label[str(i)] = label # Load the accuracy metric from the datasets package metric = datasets.load_metric("accuracy") # Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a # predictions and label_ids field) and has to return a dictionary string to float. def compute_metrics(p): """Computes accuracy on a batch of predictions""" return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids) config = AutoConfig.from_pretrained( model_args.config_name or model_args.model_name_or_path, num_labels=len(labels), label2id=label2id, id2label=id2label, finetuning_task="image-classification", cache_dir=model_args.cache_dir, revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, ) model = AutoModelForImageClassification.from_pretrained( model_args.model_name_or_path, from_tf=bool(".ckpt" in model_args.model_name_or_path), config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ) feature_extractor = AutoFeatureExtractor.from_pretrained( model_args.feature_extractor_name or model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, ) # Define torchvision transforms to be applied to each image. normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std) _train_transforms = Compose([ RandomResizedCrop(feature_extractor.size), RandomHorizontalFlip(), ToTensor(), normalize, ]) _val_transforms = Compose([ Resize(feature_extractor.size), CenterCrop(feature_extractor.size), ToTensor(), normalize, ]) def train_transforms(example_batch): """Apply _train_transforms across a batch.""" example_batch["pixel_values"] = [ _train_transforms(pil_img.convert("RGB")) for pil_img in example_batch["image"] ] return example_batch def val_transforms(example_batch): """Apply _val_transforms across a batch.""" example_batch["pixel_values"] = [ _val_transforms(pil_img.convert("RGB")) for pil_img in example_batch["image"] ] return example_batch if training_args.do_train: if "train" not in dataset: raise ValueError("--do_train requires a train dataset") if data_args.max_train_samples is not None: dataset["train"] = (dataset["train"].shuffle( seed=training_args.seed).select( range(data_args.max_train_samples))) # Set the training transforms dataset["train"].set_transform(train_transforms) if training_args.do_eval: if "validation" not in dataset: raise ValueError("--do_eval requires a validation dataset") if data_args.max_eval_samples is not None: dataset["validation"] = (dataset["validation"].shuffle( seed=training_args.seed).select( range(data_args.max_eval_samples))) # Set the validation transforms dataset["validation"].set_transform(val_transforms) # Initalize our trainer trainer = Trainer( model=model, args=training_args, train_dataset=dataset["train"] if training_args.do_train else None, eval_dataset=dataset["validation"] if training_args.do_eval else None, compute_metrics=compute_metrics, tokenizer=feature_extractor, data_collator=collate_fn, ) # Training if training_args.do_train: checkpoint = None if training_args.resume_from_checkpoint is not None: checkpoint = training_args.resume_from_checkpoint elif last_checkpoint is not None: checkpoint = last_checkpoint train_result = trainer.train(resume_from_checkpoint=checkpoint) trainer.save_model() trainer.log_metrics("train", train_result.metrics) trainer.save_metrics("train", train_result.metrics) trainer.save_state() # Evaluation if training_args.do_eval: metrics = trainer.evaluate() trainer.log_metrics("eval", metrics) trainer.save_metrics("eval", metrics) # Write model card and (optionally) push to hub kwargs = { "finetuned_from": model_args.model_name_or_path, "tasks": "image-classification", "dataset": data_args.dataset_name, "tags": ["image-classification", "vision"], } if training_args.push_to_hub: trainer.push_to_hub(**kwargs) else: trainer.create_model_card(**kwargs)
def target_transform(crop_size): return Compose([ CenterCrop(crop_size), ToTensor(), ])
LeakyReLU(0.2, inplace=True), Conv2d(CONFIG["NDF"] * 8, 1, kernel_size=4, stride=1, padding=0, bias=False), Sigmoid()) def forward(self, input): return self.mainNetwork(input).view(-1) transforms = Compose([ Resize(CONFIG["IMAGE_SIZE"]), CenterCrop(CONFIG["IMAGE_SIZE"]), ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = ImageFolder('/input/AnimateFace', transform=transforms) dataLoader = DataLoader(dataset=dataset, batch_size=CONFIG["BATCH_SIZE"], shuffle=True, drop_last=True) netG, netD = DataParallel(GeneratorNet()), DataParallel(DiscriminatorNet()) map_location = lambda storage, loc: storage optimizer_generator = Adam(netG.parameters(), 2e-4, betas=(0.5, 0.999)) optimizer_discriminator = Adam(netD.parameters(), 2e-4, betas=(0.5, 0.999)) criterion = BCELoss()
def get_generator(args): if args.dataset.startswith('mnist'): cluttered = args.dataset.endswith('cluttered') dataset_train = MNISTMulti('.', n_digits=args.n_digits, backrand=args.backrand, cluttered=cluttered, image_rows=args.row, image_cols=args.col, download=True, size_min=args.size_min, size_max=args.size_max) dataset_valid = MNISTMulti('.', n_digits=args.n_digits, backrand=args.backrand, cluttered=cluttered, image_rows=args.row, image_cols=args.col, download=False, mode='valid', size_min=args.size_min, size_max=args.size_max) dataset_test = MNISTMulti('.', n_digits=args.n_digits, backrand=args.backrand, cluttered=cluttered, image_rows=args.row, image_cols=args.col, download=False, mode='test', size_min=args.size_min, size_max=args.size_max) train_sampler = valid_sampler = test_sampler = None loader_train = data_generator_mnistmulti(dataset_train, args.batch_size, shuffle=True) loader_valid = data_generator_mnistmulti(dataset_valid, args.v_batch_size, shuffle=False) loader_test = data_generator_mnistmulti(dataset_test, args.v_batch_size, shuffle=False) preprocessor = preprocess_mnistmulti elif args.dataset == 'cifar10': # TODO: test set #transform_train = Compose([ # RandomCrop(32, padding=4), # RandomHorizontalFlip(), # ToTensor(), #]) #transform_test = Compose([ # ToTensor(), #]) dataset_train = torchvision.datasets.CIFAR10('.', download=True, transform=ToTensor()) dataset_valid = torchvision.datasets.CIFAR10('.', download=True, transform=ToTensor()) train_sampler = SubsetRandomSampler(range(0, 45000)) valid_sampler = SubsetSampler(range(45000, 50000)) loader_train = data_generator_cifar10(dataset_train, args.batch_size, sampler=train_sampler) loader_valid = data_generator_cifar10(dataset_valid, args.v_batch_size, sampler=valid_sampler) loader_test = None preprocessor = preprocess_cifar10 args.row = args.col = 32 elif args.dataset == 'bird': transform_train = Compose([ ToPILImage(), RandomCrop(448), RandomHorizontalFlip(), ToTensor(), ]) transform_test = Compose([ ToPILImage(), CenterCrop(448), ToTensor(), ]) dataset_train = BirdSingle('train', transform=transform_train) dataset_test = BirdSingle('test', transform=transform_test) train_sampler = SubsetRandomSampler(range(0, 3000)) #valid_sampler = SubsetSampler(range(2700, 3000)) test_sampler = SubsetSampler(range(0, 3033)) loader_train = data_generator_bird(dataset_train, args.batch_size, sampler=train_sampler) #loader_valid = data_generator_bird(dataset_train, args.batch_size, sampler=valid_sampler) loader_test = data_generator_bird(dataset_test, args.v_batch_size, sampler=test_sampler) loader_valid = loader_test preprocessor = preprocess_bird elif args.dataset == 'flower': dataset_train = FlowerSingle('train') dataset_valid = FlowerSingle('valid') dataset_test = FlowerSingle('test') loader_train = data_generator_flower(dataset_train, args.batch_size, shuffle=True) loader_valid = data_generator_flower(dataset_valid, args.v_batch_size, shuffle=False) loader_test = data_generator_flower(dataset_test, args.v_batch_size, shuffle=False) preprocessor = preprocess_flower elif args.dataset in ['imagenet', 'dogs']: # TODO: test set dataset_train = ImageNetSingle(args.imagenet_root, args.imagenet_train_sel, args.batch_size) dataset_valid = ImageNetSingle(args.imagenet_root, args.imagenet_valid_sel, args.v_batch_size) train_sampler = ImageNetBatchSampler(dataset_train) valid_sampler = ImageNetBatchSampler(dataset_valid) loader_train = data_generator_imagenet(dataset_train, args.batch_size, num_workers=args.num_workers) loader_valid = data_generator_imagenet(dataset_valid, args.batch_size, num_workers=args.num_workers) loader_test = None preprocessor = preprocess_imagenet return loader_train, loader_valid, loader_test, preprocessor
def generator_loss(netsD, image_encoder, fake_imgs, real_labels, words_embs, sent_emb, match_labels, cap_lens, class_ids, model, sent_emb_damsm, sent_emb_clip): numDs = len(netsD) batch_size = real_labels.size(0) logs = '' # Forward errG_total = 0 for i in range(numDs): features = netsD[i](fake_imgs[i]) cond_logits = netsD[i].COND_DNET(features, sent_emb) cond_errG = nn.BCELoss()(cond_logits, real_labels) if netsD[i].UNCOND_DNET is not None: logits = netsD[i].UNCOND_DNET(features) errG = nn.BCELoss()(logits, real_labels) g_loss = errG + cond_errG else: g_loss = cond_errG errG_total += g_loss # err_img = errG_total.data[0] logs += 'g_loss%d: %.2f ' % (i, g_loss.item()) # Ranking loss if i == (numDs - 1): # words_features: batch_size x nef x 17 x 17 # sent_code: batch_size x nef # new: rename region_features_damsm, cnn_code_damsm = image_encoder(fake_imgs[i]) #print("cnn_code before: ", cnn_code[0]) #print("fake_imgs[i] shape: ", fake_imgs[i].shape) # torch.Size([10, 3, 256, 256]) #print("cnn_code shape: ", cnn_code.shape) # torch.Size([10, 512]) #print("region_features shape: ", region_features.shape) # torch.Size([10, 512, 17, 17]) w_loss0, w_loss1, _ = words_loss(region_features_damsm, words_embs, match_labels, cap_lens, class_ids, batch_size) w_loss = (w_loss0 + w_loss1) * \ cfg.TRAIN.SMOOTH.LAMBDA # err_words = err_words + w_loss.data[0] # new: use CLIP ImageEncoder for global image features (cnn_code) if cfg.TRAIN.CLIP_LOSS: # model = torch.jit.load("model.pt").cuda().eval() input_resolution = model.input_resolution.item() # 224 preprocess = Compose([ Resize(input_resolution, interpolation=Image.BICUBIC), CenterCrop(input_resolution), ToTensor() ]) images = [] for j in range(fake_imgs[i].shape[0]): image = fake_imgs[i][j].cpu().clone() image = image.squeeze(0) unloader = transforms.ToPILImage() image = unloader(image) image = preprocess( image.convert("RGB")) # 256*256 -> 224*224 images.append(image) image_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).cuda() image_std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).cuda() image_input = torch.tensor(np.stack(images)).cuda() image_input -= image_mean[:, None, None] image_input /= image_std[:, None, None] #print("image_input shape: ", image_input.shape) # torch.Size([10, 3, 224, 224]) with torch.no_grad(): cnn_code_clip = model.encode_image(image_input).float() #print("cnn_code shape: ", cnn_code.shape) # torch.Size([10, 512]) #print("cnn_code after: ", cnn_code[0]) # new: add additional damsm sent loss if cfg.TRAIN.EXTRA_LOSS: weight = cfg.TRAIN.WEIGHT_DAMSM_LOSS s_loss0_damsm, s_loss1_damsm = sent_loss( cnn_code_damsm, sent_emb_damsm, match_labels, class_ids, batch_size) s_loss0_clip, s_loss1_clip = sent_loss( cnn_code_clip, sent_emb_clip, match_labels, class_ids, batch_size) #print("s_loss0", s_loss0_damsm) #print("type: ", type(s_loss0_damsm)) s_loss0 = torch.tensor( weight) * s_loss0_damsm + torch.tensor( (1 - weight)) * s_loss0_clip s_loss1 = torch.tensor( weight) * s_loss1_damsm + torch.tensor( (1 - weight)) * s_loss1_clip else: s_loss0, s_loss1 = sent_loss(cnn_code_clip, sent_emb_clip, match_labels, class_ids, batch_size) else: if cfg.TRAIN.CLIP_SENTENCODER: # sent_emb_clip print("SOS, please check code" ) #"ERROR: Cannot use CLIP text encoder only") sys.exit() else: s_loss0, s_loss1 = sent_loss(cnn_code_damsm, sent_emb_damsm, match_labels, class_ids, batch_size) s_loss = (s_loss0 + s_loss1) * \ cfg.TRAIN.SMOOTH.LAMBDA # err_sent = err_sent + s_loss.data[0] errG_total += w_loss + s_loss logs += 'w_loss: %.2f s_loss: %.2f ' % (w_loss.item(), s_loss.item()) return errG_total, logs
def get_transforms(transforms_list, width, height, is_train): transforms = [] for transform in transforms_list: if transform == 'random_resized_crop': scale = (0.8, 1.2) if is_train else (1.0, 1.0) ratio = (1.0, 1.0) if is_train else (1.0, 1.0) transforms.append( RandomResizedCrop( (width, height), scale=scale, ratio=ratio, ) ) elif transform == 'center_crop' : transforms.append( CenterCrop((700, 700)) ) elif transform == 'resize': transforms.append( Resize( (width, height) ) ) elif transform == 'resize': transforms.append( Resize( (width, height) ) ) elif transform == 'crop_black': # crop_black은 첫번째로 넣어줘야함. p = 1.0 if is_train else 1.0 transforms.append(CropBlack(p)) elif transform == 'random_rotate': p = 0.5 if is_train else 0.25 transforms.append(RandomRotate(p)) elif transform == 'random_vertical_flip': p = 0.5 if is_train else 0.25 transforms.append(RandomVerticalFlip(p)) elif transform == 'random_horizontal_flip': p = 0.5 if is_train else 0.25 transforms.append(RandomHorizontalFlip(p)) elif transform == 'random_color_jitter': brightness = 0.1 if is_train else 0.0 contrast = 0.1 if is_train else 0.0 transforms.append(ColorJitter( brightness=brightness, contrast=contrast, saturation=0, hue=0, )) elif transform == 'random_grayscale': p = 0.5 if is_train else 0.25 transforms.append(RandomGrayscale(p)) elif transform == 'ben_graham': p = 1 if is_train else 1 transforms.append(BenGrahamAug(p)) elif transform == 'imagenet_poilcy': transforms.append(ImageNetPolicy()) elif transform == 'cifar_policy': transforms.append(CIFAR10Policy()) elif transform == 'svhn_policy': transform.append(SVHNPolicy()) else: print(transform) raise NotImplementedError return transforms
def train(args): writer = SummaryWriter(log_dir=args.logdir) # Datasets dataset_tr = CUBDataset(root=args.datapath, train=True, transforms=Compose([ Resize(256), RandomCrop((224, 224), pad_if_needed=True), RandomHorizontalFlip(), ToTensor() ])) data_loader_tr = DataLoader(dataset_tr, batch_size=args.batch_size, shuffle=True, num_workers=args.number_workers) dataset_val = CUBDataset(root=args.datapath, train=False, transforms=Compose([CenterCrop(224), ToTensor()])) data_loader_val = DataLoader(dataset_val, batch_size=args.batch_size, shuffle=True, num_workers=args.number_workers) # Model model = BirdNet(num_classes=dataset_tr.number_classes).to(args.device) # Optimizer optimizer = Adam( params=model.classifier.parameters( ), # Optimize only the classifier layer lr=args.learning_rate, weight_decay=args.weight_decay) # Meters meter_loss = AverageMeter() meter_accuracy = AverageMeter() train_accuracy, train_loss, val_accuracy, val_loss = 0, 0, 0, 0 epoch_bar = tqdm.trange(args.number_epochs, desc='Epoch') for epoch in epoch_bar: epoch_start_time = time() # Training model.train() torch.set_grad_enabled(True) batch_bar = tqdm.tqdm(data_loader_tr, desc='Batch') meter_loss.reset() meter_accuracy.reset() for batch in batch_bar: input_batch = batch[0].to(args.device) target = batch[1].to(args.device) logits = model(input_batch) number_samples = target.shape[0] predictions = logits.argmax(dim=1) accuracy = (predictions == target).float().sum() / number_samples loss = F.cross_entropy(logits, target) meter_accuracy.update(accuracy, number_samples) meter_loss.update(loss, number_samples) optimizer.zero_grad() loss.backward() optimizer.step() # batch_bar.set_postfix({'loss': loss.item()}) train_accuracy, train_loss = meter_accuracy.get_average( ), meter_loss.get_average() epoch_bar.set_postfix({"loss": train_loss, "accuracy": train_accuracy}) writer.add_scalar("/train/loss", train_loss, epoch) writer.add_scalar("/train/accuracy", train_accuracy, epoch) # Validation model.eval() torch.set_grad_enabled(False) batch_bar = tqdm.tqdm(data_loader_val, desc='Batch') meter_loss.reset() meter_accuracy.reset() for batch in batch_bar: input_batch = batch[0].to(args.device) target = batch[1].to(args.device) logits = model(input_batch) number_samples = target.shape[0] predictions = logits.argmax(dim=1) accuracy = (predictions == target).float().sum() / number_samples loss = F.cross_entropy(logits, target) meter_accuracy.update(accuracy, number_samples) meter_loss.update(loss, number_samples) val_accuracy, val_loss = meter_accuracy.get_average( ), meter_loss.get_average() epoch_time = time() - epoch_start_time epoch_bar.set_postfix({"loss": val_loss, "accuracy": val_accuracy}) writer.add_scalar("/validation/loss", val_loss, epoch) writer.add_scalar("/validation/accuracy", val_accuracy, epoch) writer.add_scalar("time_per_epoch", epoch_time, epoch) torch.save(model.classifier.state_dict(), str(args.logdir / "final_model.pt")) return { "train": { "accuracy": train_accuracy, "loss": train_loss }, "validation": { "accuracy": val_accuracy, "loss": val_loss } }
cv2.imshow("test", cv_image) cv2.waitKey(-1) def showTensorImage(tensor_image): pil_image = transforms.ToPILImage()(tensor_image).convert('RGB') showImage(pil_image) if __name__ == "__main__": from torchvision.transforms import Compose, CenterCrop, Normalize from torchvision.transforms import ToTensor, ToPILImage from piwise.transform import Relabel, ToLabel, Colorize image_transform = ToPILImage() input_transform = Compose([ CenterCrop(30), ToTensor(), #Normalize([.485, .456, .406], [.229, .224, .225]), ]) target_transform = Compose([ CenterCrop(30), ToLabel(), #Relabel(255, 21), ]) dataset = VOC12("/data_1/data/VOC2012/VOCdevkit/VOC2012", input_transform, target_transform) for image, label in dataset: print(label) #showTensorImage(image)
def display_transform(): return Compose([ToPILImage(), Resize(448), CenterCrop(448), ToTensor()])
def input_transform(crop_size, upscale_factor): return Compose([ CenterCrop(crop_size), Resize(crop_size // upscale_factor, interpolation=Image.BICUBIC) ])
def target_transform(crop_size): return Compose([CenterCrop(crop_size)])
def test_train(): # 此函数 解析 train 文件的过程 global weight from robosat.robosat.tools.train import train from robosat.robosat.tools.train import validate from robosat.robosat.config import load_config from robosat.robosat.unet import UNet from torch.nn import DataParallel from robosat.robosat.losses import CrossEntropyLoss2d, mIoULoss2d, FocalLoss2d, LovaszLoss2d import collections from robosat.robosat.log import Log args = parse_default() print(args) model = load_config(args.model) dataset = load_config(args.dataset) print(dataset) workers = args.workers print(model) device = torch.device("cuda" if model["common"]["cuda"] else "cpu") print("device", device) if model["common"]["cuda"] and not torch.cuda.is_available(): sys.exit("Error: CUDA requested but not available") # 生成文件夹,文件夹在根目录下 os.makedirs(model["common"]["checkpoint"], exist_ok=True) num_classes = len(dataset["common"]["classes"]) print("num_classes", num_classes) ##################################################### # 加载Unet模型 默认下载resnet模型,我的保存在C:\Users\Administrator/.cache\torch\checkpoints\resnet50-19c8e357.pth net = UNet(num_classes) net = DataParallel(net) net = net.to(device) print(net) if model["common"]["cuda"]: torch.backends.cudnn.benchmark = True ################################################## # 设置训练参数 # 如果使用"CrossEntropy", "mIoU", "Focal"损失函数,必须要有weight try: weight = torch.Tensor(dataset["weights"]["values"]) except KeyError: if model["opt"]["loss"] in ("CrossEntropy", "mIoU", "Focal"): sys.exit( "Error: The loss function used, need dataset weights values") optimizer = Adam(net.parameters(), lr=model["opt"]["lr"]) resume = 0 if args.checkpoint: # 具体干啥不知道,默认值设置成false,就不用执行了 pass if model["opt"]["loss"] == "CrossEntropy": criterion = CrossEntropyLoss2d(weight=weight).to(device) elif model["opt"]["loss"] == "mIoU": criterion = mIoULoss2d(weight=weight).to(device) elif model["opt"]["loss"] == "Focal": criterion = FocalLoss2d(weight=weight).to(device) elif model["opt"]["loss"] == "Lovasz": criterion = LovaszLoss2d().to(device) else: sys.exit("Error: Unknown [opt][loss] value !") ##################################################################### # 加载数据集 target_size = (model["common"]["image_size"], ) * 2 print("target_size", target_size) batch_size = model["common"]["batch_size"] print("batch_size", batch_size) # 数据集的路径 path = dataset["common"]["dataset"] print("path", path) mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225] from robosat.robosat.transforms import ( JointCompose, JointTransform, JointRandomHorizontalFlip, JointRandomRotation, ConvertImageMode, ImageToTensor, MaskToTensor, ) from torchvision.transforms import Resize, CenterCrop, Normalize transform = JointCompose([ JointTransform(ConvertImageMode("RGB"), ConvertImageMode("P")), JointTransform(Resize(target_size, Image.BILINEAR), Resize(target_size, Image.NEAREST)), JointTransform(CenterCrop(target_size), CenterCrop(target_size)), JointRandomHorizontalFlip(0.5), JointRandomRotation(0.5, 90), JointRandomRotation(0.5, 90), JointRandomRotation(0.5, 90), JointTransform(ImageToTensor(), MaskToTensor()), JointTransform(Normalize(mean=mean, std=std), None), ]) from robosat.robosat.datasets import SlippyMapTilesConcatenation train_dataset = SlippyMapTilesConcatenation( [os.path.join(path, "training", "images")], os.path.join(path, "training", "labels"), transform) val_dataset = SlippyMapTilesConcatenation( [os.path.join(path, "validation", "images")], os.path.join(path, "validation", "labels"), transform) print("len train_dataset:", len(train_dataset)) print("len val_dataset:", len(val_dataset)) assert len(train_dataset) > 0, "at least one tile in training dataset" assert len(val_dataset) > 0, "at least one tile in validation dataset" train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=workers) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True, num_workers=workers) ############################################ # 保存训练参数 num_epochs = model["opt"]["epochs"] if resume >= num_epochs: sys.exit( "Error: Epoch {} set in {} already reached by the checkpoint provided" .format(num_epochs, args.model)) history = collections.defaultdict(list) log = Log(os.path.join(model["common"]["checkpoint"], "log")) log.log("--- Hyper Parameters on Dataset: {} ---".format( dataset["common"]["dataset"])) log.log("Batch Size:\t {}".format(model["common"]["batch_size"])) log.log("Image Size:\t {}".format(model["common"]["image_size"])) log.log("Learning Rate:\t {}".format(model["opt"]["lr"])) log.log("Loss function:\t {}".format(model["opt"]["loss"])) if "weight" in locals(): log.log("Weights :\t {}".format(dataset["weights"]["values"])) log.log("---") ########################################################## # 开始训练 for epoch in range(resume, num_epochs): log.log("Epoch: {}/{}".format(epoch + 1, num_epochs)) train_hist = train(train_loader, num_classes, device, net, optimizer, criterion) log.log( "Train loss: {:.4f}, mIoU: {:.3f}, {} IoU: {:.3f}, MCC: {:.3f}". format( train_hist["loss"], train_hist["miou"], dataset["common"]["classes"][1], train_hist["fg_iou"], train_hist["mcc"], )) for k, v in train_hist.items(): history["train " + k].append(v) val_hist = validate(val_loader, num_classes, device, net, criterion) log.log( "Validate loss: {:.4f}, mIoU: {:.3f}, {} IoU: {:.3f}, MCC: {:.3f}". format(val_hist["loss"], val_hist["miou"], dataset["common"]["classes"][1], val_hist["fg_iou"], val_hist["mcc"])) if (epoch + 1) % 10 == 0: for k, v in val_hist.items(): history["val " + k].append(v) visual = "history-{:05d}-of-{:05d}.png".format( epoch + 1, num_epochs) plot(os.path.join(model["common"]["checkpoint"], visual), history) checkpoint = "checkpoint-{:05d}-of-{:05d}.pth".format( epoch + 1, num_epochs) states = { "epoch": epoch + 1, "state_dict": net.state_dict(), "optimizer": optimizer.state_dict() } torch.save(states, os.path.join(model["common"]["checkpoint"], checkpoint))
def __init__(self, Datasets_params, mode, transform=None, input_transform=None, target_transform=None, randstep=5, rand=None, in_type=None): super(DAVIS2017_loader, self).__init__() self.iter_mode = mode self.randstep = randstep self.Datasets_params = Datasets_params self.reading_type = Datasets_params[0]['reading_type'] self.num_objects = [] datasets = [] X_train = [] y_train = [] X_val = [] y_val = [] X_test = [] y_test = [] for DP in Datasets_params: X = [] Y = [] self.root = DP['root'] if DP['reading_type'] in ['SVOS', 'SVOS-YTB']: self.years = DP['year'] if DP['mode'] in ['test', '16val', '17val', 'YTB18']: Set = '/val.txt' elif DP['mode'] in ['16all']: Set = '/trainval.txt' elif DP['mode'] in ['test_dev', '17test_dev']: Set = '/test-dev.txt' with open(self.root + 'ImageSets/' + self.years + Set) as f: SetsTxts = f.readlines() print("Reading folders ", SetsTxts) # if DP['mode'] in ['all', 'online_all']: # with open(self.root + 'ImageSets/' + self.years + '/val.txt') as f: # SetsTxts2 = f.readlines() # SetsTxts = SetsTxts + SetsTxts2 Dirs = [ self.root + 'JPEGImages/480p/' + name[0:-1] for name in SetsTxts ] Dirs.sort() for dir in Dirs: print("scanning DIR ", dir) files = glob(dir + '/*.*') files.sort() if self.iter_mode == 'test': X.append(files) if DP['tar_mode'] == 'find': Y_files = glob( dir.replace('JPEGImages', 'Annotations') + '/*.*') if len(Y_files) == 0: print(dir + 'Not find') else: Y_files = [ f.replace('.jpg', '.png').replace( 'JPEGImages', 'Annotations').replace('.bmp', '.png') for f in files ] Y_files.sort() Y.append(Y_files) else: assert ('error') if DP['reading_type'] != 'SVOS-YTB': _mask = np.array(Image.open(Y_files[0]).convert("P")) self.num_objects.append(np.max(_mask)) if DP['mode'] == 'train': X_train = X y_train = Y elif DP['mode'] in [ 'test', 'all', 'test_dev', '17test_dev', '16val', '17val', '16all', 'YTB18' ]: X_test = X y_test = Y datasets.append( dict(X_train=[X_train], y_train=[y_train], X_valid=[X_val], y_valid=[y_val], X_test=[X_test], y_test=[y_test])) self.image_filenames = Data_combinePicNameList(datasets) self.transform = transform self.input_transform = input_transform self.target_transform = target_transform self.centerCrop = CenterCrop((480, 864)) self.random_crop = RandomCrop((512, 960)) self.rand = rand self.in_type = in_type self.idx_0 = 0
plot_data.append(samples[i].cpu()) all_dists = torch.min(sample_cdist[i], flip_sample_cdist[i]) indices = torch.topk(-all_dists, k=k)[1] for ind in indices: plot_data.append(data[ind]) plot_data = torch.stack(plot_data, dim=0) save_image(plot_data, '{}.png'.format(name), nrow=k + 1) if __name__ == '__main__': args = parser.parse_args() if args.dataset == 'church': transforms = Compose([ Resize(96), CenterCrop(96), ToTensor() ]) dataset = LSUN('exp/datasets/lsun', ['church_outdoor_train'], transform=transforms) elif args.dataset == 'tower' or args.dataset == 'bedroom': transforms = Compose([ Resize(128), CenterCrop(128), ToTensor() ]) dataset = LSUN('exp/datasets/lsun', ['{}_train'.format(args.dataset)], transform=transforms) elif args.dataset == 'celeba': transforms = Compose([ CenterCrop(140),
def main(args): normalize = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) transform = Compose([Resize(256), CenterCrop(224), ToTensor(), normalize]) dataset = ImageDataset(args.image_folder, transform=transform, return_paths=True) # n_images = len(dataset) dataloader = DataLoader(dataset, shuffle=False, batch_size=args.batch_size, pin_memory=True, num_workers=0) model = models.resnet50(pretrained=True).to(args.device) model.eval() config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, allow_soft_placement=True, device_count={'CPU': 1}) sess = tf.Session(config=config) x_op = tf.placeholder(tf.float32, shape=( None, 3, 224, 224, )) tf_model = convert_pytorch_model_to_tf(model, args.device) cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits') # compute clip_min and clip_max suing a full black and a full white image clip_min = normalize(torch.zeros(3, 1, 1)).min().item() clip_max = normalize(torch.ones(3, 1, 1)).max().item() eps = args.eps / 255. eps_iter = 20 nb_iter = 10 args.ord = np.inf if args.ord < 0 else args.ord grad_params = {'eps': eps, 'ord': args.ord} common_params = {'clip_min': clip_min, 'clip_max': clip_max} iter_params = {'eps_iter': eps_iter / 255., 'nb_iter': nb_iter} attack_name = '' if args.attack == 'fgsm': attack_name = '_L{}_eps{}'.format(args.ord, args.eps) attack_op = FastGradientMethod(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params} elif args.attack == 'iter': attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps, eps_iter, nb_iter) attack_op = BasicIterativeMethod(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params, **iter_params} elif args.attack == 'm-iter': attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps, eps_iter, nb_iter) attack_op = MomentumIterativeMethod(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params, **iter_params} elif args.attack == 'pgd': attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps, eps_iter, nb_iter) attack_op = MadryEtAl(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params, **iter_params} elif args.attack == 'jsma': attack_op = SaliencyMapMethod(cleverhans_model, sess=sess) attack_params = {'theta': eps, 'symbolic_impl': False, **common_params} elif args.attack == 'deepfool': attack_op = DeepFool(cleverhans_model, sess=sess) attack_params = common_params elif args.attack == 'cw': attack_op = CarliniWagnerL2(cleverhans_model, sess=sess) attack_params = common_params elif args.attack == 'lbfgs': attack_op = LBFGS(cleverhans_model, sess=sess) target = np.zeros((1, 1000)) target[0, np.random.randint(1000)] = 1 y = tf.placeholder(tf.float32, target.shape) attack_params = {'y_target': y, **common_params} attack_name = args.attack + attack_name print('Running [{}]. Params: {}'.format(args.attack.upper(), attack_params)) adv_x_op = attack_op.generate(x_op, **attack_params) adv_preds_op = tf_model(adv_x_op) preds_op = tf_model(x_op) n_success = 0 n_processed = 0 progress = tqdm(dataloader) for paths, x in progress: progress.set_description('ATTACK') z, adv_x, adv_z = sess.run([preds_op, adv_x_op, adv_preds_op], feed_dict={ x_op: x, y: target }) src, dst = np.argmax(z, axis=1), np.argmax(adv_z, axis=1) success = src != dst success_paths = np.array(paths)[success] success_adv_x = adv_x[success] success_src = src[success] success_dst = dst[success] n_success += success_adv_x.shape[0] n_processed += x.shape[0] progress.set_postfix( {'Success': '{:3.2%}'.format(n_success / n_processed)}) progress.set_description('SAVING') for p, a, s, d in zip(success_paths, success_adv_x, success_src, success_dst): path = '{}_{}_src{}_dst{}.npz'.format(p, attack_name, s, d) path = os.path.join(args.out_folder, path) np.savez_compressed(path, img=a)
from MA.transform import ToLabel, Relabel from MA.dataset import MA, eval_ds # from basic_net.dataset import dt_ma torch.cuda.set_device(0) NUM_CHANNELS = 3 NUM_CLASSES = 2 color_transform = Colorize() image_transform = ToPILImage() input_transform = Compose([ Scale(256), CenterCrop(256), ToTensor(), Normalize([.485, .456, .406], [.229, .224, .225]), ]) eval_input_transform = Compose([ Scale(256), ToTensor(), Normalize([.485, .456, .406], [.229, .224, .225]), ]) target_transform = Compose([ Scale(256), CenterCrop(256), ToLabel(), Relabel(255, 1),
help="Location of mapping file for gestures to commands") args = parser.parse_args() parser.print_help() # sys.exit(1) print('Using %s for inference' % ('GPU' if args.use_gpu else 'CPU')) # initialise some variables verbose = args.verbose device = torch.device( "cuda" if args.use_gpu and torch.cuda.is_available() else "cpu") transform = Compose([ ToPILImage(), CenterCrop(84), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) model = ConvColumn(num_classes) # read in configuration file for mapping of gestures to keyboard keys mapping = configparser.ConfigParser() action = {} if os.path.isfile(args.mapping): mapping.read(args.mapping) for m in mapping['MAPPING']: val = mapping['MAPPING'][m].split(',') action[m] = {
TRAIN_TRANSFORMS = [ RandomApply( [RandomAffine(degrees=45, translate=(0.1, 0.1), scale=(0.7, 1.2), resample=2), ], p=0.5 ), RandomCrop(size=350), RandomHorizontalFlip(p=0.5), RandomVerticalFlip(p=0.5), ColorJitter(hue=0.1, brightness=0.1), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ] VAL_TRANSFORMS = [ CenterCrop(size=350), RandomHorizontalFlip(p=0.5), RandomVerticalFlip(p=0.5), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ] BATCH_SIZE = 32 NUM_WORKERS = 8 TRAIN_LOADER, VAL_LOADER = get_data_loaders( train_dataset_path=DATASET_PATH / "train_400x400", val_dataset_path=DATASET_PATH / "val_400x400", train_data_transform=TRAIN_TRANSFORMS, val_data_transform=VAL_TRANSFORMS,
def init_dataloaders(src_path, tgt_path, src_num, tgt_num, sample_ratio, resize_dim, batch_size, shuffle, crop_size=224, filter_num_cls=50): if not "domainnet" in src_path and not "domainnet" in tgt_path: transforms = Compose([ ToPILImage(), Resize(resize_dim), ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) train_transforms = transforms test_transforms = transforms else: train_transforms = Compose([ ToPILImage(), RandomHorizontalFlip(), Resize(resize_dim), RandomCrop(resize_dim), ToTensor(), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) test_transforms = Compose([ ToPILImage(), Resize(resize_dim), CenterCrop(resize_dim), ToTensor(), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) train_dataset = PairDataset(src_path, tgt_path, src_num, tgt_num, sample_ratio, transform=train_transforms, filter_num_cls=filter_num_cls) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8) valid_dataset = SingleDataset(tgt_path, "te", transform=test_transforms, filter_num_cls=filter_num_cls) valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=4) test_dataset = SingleDataset(tgt_path, "te", transform=test_transforms, filter_num_cls=filter_num_cls) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4) return train_dataloader, valid_dataloader, test_dataloader
def input_transform(crop_size): return Compose([ CenterCrop(crop_size), ToTensor() ])
Conv2d(config.DISCRIMINATOR_FEATURES_NUM * 8, 1, kernel_size=4, stride=1, padding=0, bias=False), Sigmoid()) def forward(self, input): return self.mainNetwork(input).view(-1) if PHRASE == "TRAIN": transforms = Compose([ Resize(config.IMAGE_SIZE), CenterCrop(config.IMAGE_SIZE), ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = ImageFolder(config.GAN_DATA_PATH, transform=transforms) dataLoader = DataLoader(dataset=dataset, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=config.NUM_WORKERS_LOAD_IMAGE, drop_last=True) netG, netD = DataParallel(GeneratorNet()), DataParallel(DiscriminatorNet()) map_location = lambda storage, loc: storage optimizer_generator = Adam(netG.parameters(), config.LR_GENERATOR, betas=(config.BETA1, 0.999))
def __init__(self, opt, val=False): super(CustomImageNet1K, self).__init__() dir_dataset = os.path.join(opt.path_ImageNet, "Val" if val else "Train") #list_dir = sorted(glob(os.path.join(dir_dataset, '*'))) #self.list_input = [] #sorted(glob(os.path.join(dir_dataset, 'val' if val else 'train', '*'))) #.JPEG'))) #for dir in list_dir: # self.list_input.extend(glob(os.path.join(dir_dataset, dir, "*.JPEG"))) self.list_input = sorted(glob(os.path.join(dir_dataset, "*.JPEG"))) assert len(self.list_input) > 0, "Please check the path of dataset. Current path is set as {}".format(dir_dataset) if val: # path_label = "/mnt/home/gishin/training_WNID2class.txt" path_label = opt.path_label_val dict_WNID2label = dict() # with open(path_label, 'r') as txt_file: # csv_file = reader(txt_file, delimiter=',') # print(csv_file) # for i, row in enumerate(csv_file): # if i != 0: # if int(row[1]) - 1 == 1000: # break # dict_WNID2label.update({row[0]: int(row[1]) - 1}) # -1 is for making the label start from 0. # else: # pass # self.label = dict_WNID2label # print(len(self.list_input)) # path_label = os.path.join("/mnt/home/gishin/ILSVRC2012_validation_ground_truth.txt") label = list() with open(path_label, 'r') as txt_file: for i, row in enumerate(txt_file): dict_WNID2label.update({i: int(row) - 1}) # label.append(int(row) - 1) self.label = dict_WNID2label self.transform = Compose([Resize(256), CenterCrop(224), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) else: # path_label = "/mnt/home/gishin/training_WNID2class.txt" path_label = opt.path_label_train dict_WNID2label = dict() with open(path_label, 'r') as txt_file: csv_file = reader(txt_file, delimiter=',') for i, row in enumerate(csv_file): if i != 0: if int(row[1]) - 1 == 1000: break dict_WNID2label.update({row[0]: int(row[1]) - 1}) # -1 is for making the label start from 0. else: pass self.label = dict_WNID2label self.transform = Compose([RandomResizedCrop(224), RandomHorizontalFlip(), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) self.val = val
def load(name: str, device: Union[str, torch.device] = "cuda" if torch.cuda.is_available() else "cpu"): if name not in _MODELS: raise RuntimeError( f"Model {name} not found; available models = {available_models()}") model_path = _download(_MODELS[name]) model = torch.jit.load(model_path, map_location=device).eval() n_px = model.input_resolution.item() # patch the device names device_holder = torch.jit.trace( lambda: torch.ones([]).to(torch.device(device)), example_inputs=[]) device_node = [ n for n in device_holder.graph.findAllNodes("prim::Constant") if "Device" in repr(n) ][-1] def patch_device(module): graphs = [module.graph] if hasattr(module, "graph") else [] if hasattr(module, "forward1"): graphs.append(module.forward1.graph) for graph in graphs: for node in graph.findAllNodes("prim::Constant"): if "value" in node.attributeNames() and str( node["value"]).startswith("cuda"): node.copyAttributes(device_node) model.apply(patch_device) patch_device(model.encode_image) patch_device(model.encode_text) # patch dtype to float32 on CPU if device == "cpu": float_holder = torch.jit.trace(lambda: torch.ones([]).float(), example_inputs=[]) float_input = list(float_holder.graph.findNode("aten::to").inputs())[1] float_node = float_input.node() def patch_float(module): graphs = [module.graph] if hasattr(module, "graph") else [] if hasattr(module, "forward1"): graphs.append(module.forward1.graph) for graph in graphs: for node in graph.findAllNodes("aten::to"): inputs = list(node.inputs()) for i in [ 1, 2 ]: # dtype can be the second or third argument to aten::to() if inputs[i].node()["value"] == 5: inputs[i].node().copyAttributes(float_node) model.apply(patch_float) patch_float(model.encode_image) patch_float(model.encode_text) model.float() transform = Compose([ Resize(n_px, interpolation=Image.BICUBIC), CenterCrop(n_px), lambda image: image.convert("RGB"), ToTensor(), Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), ]) return model, transform
def __init__(self, opts, blur_root, sharp_root, sharp_start_root, gt_root, gt_pos_root, test_mode=False): blur_datasets = np.load(blur_root) sharp_datasets = np.load(sharp_root) sharp_start_datasets = np.load(sharp_start_root) gt_datasets = np.load(gt_root) gt_pos_datasets = np.load(gt_pos_root) self.test_mode = test_mode category = blur_datasets.files[0] self.blur = blur_datasets[category] self.sharp = sharp_datasets[category] self.sharp_start = sharp_start_datasets[category] self.gt = gt_datasets[category] self.gt_pos = gt_pos_datasets[category] self.batch_size = self.blur.shape[0] self.train_set_num = int((1 - test_set_ratio) * self.batch_size) if not test_mode: self.blur = self.blur[:self.train_set_num] self.sharp = self.sharp[:self.train_set_num] self.sharp_start = self.sharp_start[:self.train_set_num] self.gt = self.gt[:self.train_set_num] self.gt_pos = self.gt_pos[:self.train_set_num] else: self.blur = self.blur[self.train_set_num:] self.sharp = self.sharp[self.train_set_num:] self.sharp_start = self.sharp_start[self.train_set_num:] self.gt = self.gt[self.train_set_num:] self.gt_pos = self.gt_pos[self.train_set_num:] # flatten self.blur = np.concatenate(self.blur, 0) self.sharp = np.concatenate(self.sharp, 0) self.sharp_start = np.concatenate(self.sharp_start, 0) #self.gt = self.gt[:,:,3:,:] #self.gt = self.gt[:,:,::4,:] # [data_num, frame_num, ratio(16 -> 4), 2] # gt 평균 내서, 하나의 값으로 만들기 #self.gt = self.gt.mean(2, keepdims=True) print(self.gt.shape) self.gt = np.concatenate(self.gt, 0) self.gt = np.reshape(self.gt, [self.gt.shape[0], -1]) self.gt = self.gt.astype(np.float32) #self.gt_pos = self.gt_pos[:,:,15:,:] self.gt_pos = np.concatenate(self.gt_pos, 0) self.gt_pos = np.reshape(self.gt_pos, [self.gt_pos.shape[0], -1]) self.gt_pos = self.gt_pos.astype(np.float32) self.dataset_size = len(self.blur) self.input_dim_A = opts.input_dim_a self.input_dim_B = opts.input_dim_b self.resize_x = opts.resize_size_x self.resize_y = opts.resize_size_y if opts.phase == 'train': transforms = [RandomCrop(opts.crop_size)] else: transforms = [CenterCrop(opts.crop_size)] #if not opts.no_flip: # transforms.append(RandomHorizontalFlip()) transforms.append(ToTensor()) transforms.append(Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])) self.transforms = Compose(transforms) print('train A, B: %d images' % (self.dataset_size)) return
def input_transform(crop_size, upscale_factor): return Compose([ CenterCrop(crop_size), Resize(crop_size // upscale_factor), ToTensor(), ])
batch_size = args.batch_size epoch = args.num_epoch save_path = 'model_save/' #normalize for ImageNet normalize = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) crop = 200 rng = np.random.RandomState(args.random_seed) precrop = crop + 24 crop = rng.randint(crop, precrop) transformations = Compose([ Scale((256, 256)), Pad((24, 24, 24, 24)), CenterCrop(precrop), RandomCrop(crop), Scale((256, 256)), ToTensor(), normalize ]) #define a batch-wise l2 loss def criterion_l2(input_f, target_f): # return a per batch l2 loss res = (input_f - target_f) res = res * res return res.sum(dim=2) def criterion_l2_2(input_f, target_f):
import torch import torch.nn as nn import torch.nn.functional as F from torch.utils.data import DataLoader, TensorDataset from torchvision.datasets.mnist import MNIST from torchvision.transforms import Compose, Resize, ToTensor, CenterCrop from torch.optim import Adam from einops import rearrange import opt_einsum as oe from libcrap.torch import set_random_seeds MNIST_TRANSFORM = Compose((CenterCrop((16, 16)), Resize(4, 4), ToTensor())) train_size = 50000 batch_size = 512 device = torch.device("cuda:1") lr = 1e-2 num_iters = 30000 mov_avg_coeff = 0.99 seed = 0 save_where = ( "/mnt/important/experiments/tiny_mnist_probabilistic_multilinear_classifier_adam.pth" ) set_random_seeds(device, seed) print(f"{seed=}")
def target_transform(crop_size): return Compose([ CenterCrop(crop_size), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])
def main(): args = parse_args() # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. # If we're using tracking, we also need to initialize it here and it will pick up all supported trackers in the environment accelerator = Accelerator( log_with="all", logging_dir=args.output_dir) if args.with_tracking else Accelerator() logger.info(accelerator.state) # Make one log on every process with the configuration for debugging. logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, ) logger.info(accelerator.state) # Setup logging, we only want one process per machine to log things on the screen. # accelerator.is_local_main_process is only True for one process per machine. logger.setLevel( logging.INFO if accelerator.is_local_main_process else logging.ERROR) if accelerator.is_local_main_process: datasets.utils.logging.set_verbosity_warning() transformers.utils.logging.set_verbosity_info() else: datasets.utils.logging.set_verbosity_error() transformers.utils.logging.set_verbosity_error() # If passed along, set the training seed now. if args.seed is not None: set_seed(args.seed) # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: if args.hub_model_id is None: repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) else: repo_name = args.hub_model_id repo = Repository(args.output_dir, clone_from=repo_name) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: gitignore.write("step_*\n") if "epoch_*" not in gitignore: gitignore.write("epoch_*\n") elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() # Get the datasets: you can either provide your own training and evaluation files (see below) # or specify a Dataset from the hub (the dataset will be downloaded automatically from the datasets Hub). # In distributed training, the load_dataset function guarantees that only one local process can concurrently # download the dataset. if args.dataset_name is not None: # Downloading and loading a dataset from the hub. dataset = load_dataset(args.dataset_name, task="image-classification") else: data_files = {} if args.train_dir is not None: data_files["train"] = os.path.join(args.train_dir, "**") if args.validation_dir is not None: data_files["validation"] = os.path.join(args.validation_dir, "**") dataset = load_dataset( "imagefolder", data_files=data_files, cache_dir=args.cache_dir, task="image-classification", ) # See more about loading custom images at # https://huggingface.co/docs/datasets/v2.0.0/en/image_process#imagefolder. # If we don't have a validation split, split off a percentage of train as validation. args.train_val_split = None if "validation" in dataset.keys( ) else args.train_val_split if isinstance(args.train_val_split, float) and args.train_val_split > 0.0: split = dataset["train"].train_test_split(args.train_val_split) dataset["train"] = split["train"] dataset["validation"] = split["test"] # Prepare label mappings. # We'll include these in the model's config to get human readable labels in the Inference API. labels = dataset["train"].features["labels"].names label2id = {label: str(i) for i, label in enumerate(labels)} id2label = {str(i): label for i, label in enumerate(labels)} # Load pretrained model and feature extractor # # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently # download model & vocab. config = AutoConfig.from_pretrained( args.model_name_or_path, num_labels=len(labels), i2label=id2label, label2id=label2id, finetuning_task="image-classification", ) feature_extractor = AutoFeatureExtractor.from_pretrained( args.model_name_or_path) model = AutoModelForImageClassification.from_pretrained( args.model_name_or_path, from_tf=bool(".ckpt" in args.model_name_or_path), config=config, ) # Preprocessing the datasets # Define torchvision transforms to be applied to each image. normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std) train_transforms = Compose([ RandomResizedCrop(feature_extractor.size), RandomHorizontalFlip(), ToTensor(), normalize, ]) val_transforms = Compose([ Resize(feature_extractor.size), CenterCrop(feature_extractor.size), ToTensor(), normalize, ]) def preprocess_train(example_batch): """Apply _train_transforms across a batch.""" example_batch["pixel_values"] = [ train_transforms(image.convert("RGB")) for image in example_batch["image"] ] return example_batch def preprocess_val(example_batch): """Apply _val_transforms across a batch.""" example_batch["pixel_values"] = [ val_transforms(image.convert("RGB")) for image in example_batch["image"] ] return example_batch with accelerator.main_process_first(): if args.max_train_samples is not None: dataset["train"] = dataset["train"].shuffle(seed=args.seed).select( range(args.max_train_samples)) # Set the training transforms train_dataset = dataset["train"].with_transform(preprocess_train) if args.max_eval_samples is not None: dataset["validation"] = dataset["validation"].shuffle( seed=args.seed).select(range(args.max_eval_samples)) # Set the validation transforms eval_dataset = dataset["validation"].with_transform(preprocess_val) # DataLoaders creation: def collate_fn(examples): pixel_values = torch.stack( [example["pixel_values"] for example in examples]) labels = torch.tensor([example["labels"] for example in examples]) return {"pixel_values": pixel_values, "labels": labels} train_dataloader = DataLoader(train_dataset, shuffle=True, collate_fn=collate_fn, batch_size=args.per_device_train_batch_size) eval_dataloader = DataLoader(eval_dataset, collate_fn=collate_fn, batch_size=args.per_device_eval_batch_size) # Optimizer # Split weights in two groups, one with weight decay and the other not. no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": args.weight_decay, }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=args.learning_rate) # Scheduler and math around the number of training steps. num_update_steps_per_epoch = math.ceil( len(train_dataloader) / args.gradient_accumulation_steps) if args.max_train_steps is None: args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch else: args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch) lr_scheduler = get_scheduler( name=args.lr_scheduler_type, optimizer=optimizer, num_warmup_steps=args.num_warmup_steps, num_training_steps=args.max_train_steps, ) # Prepare everything with our `accelerator`. model, optimizer, train_dataloader, eval_dataloader, lr_scheduler = accelerator.prepare( model, optimizer, train_dataloader, eval_dataloader, lr_scheduler) # Figure out how many steps we should save the Accelerator states if hasattr(args.checkpointing_steps, "isdigit"): checkpointing_steps = args.checkpointing_steps if args.checkpointing_steps.isdigit(): checkpointing_steps = int(args.checkpointing_steps) else: checkpointing_steps = None # We need to initialize the trackers we use, and also store our configuration if args.with_tracking: experiment_config = vars(args) # TensorBoard cannot log Enums, need the raw value experiment_config["lr_scheduler_type"] = experiment_config[ "lr_scheduler_type"].value accelerator.init_trackers("image_classification_no_trainer", experiment_config) # Get the metric function metric = load_metric("accuracy") # Train! total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps logger.info("***** Running training *****") logger.info(f" Num examples = {len(train_dataset)}") logger.info(f" Num Epochs = {args.num_train_epochs}") logger.info( f" Instantaneous batch size per device = {args.per_device_train_batch_size}" ) logger.info( f" Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}" ) logger.info( f" Gradient Accumulation steps = {args.gradient_accumulation_steps}") logger.info(f" Total optimization steps = {args.max_train_steps}") # Only show the progress bar once on each machine. progress_bar = tqdm(range(args.max_train_steps), disable=not accelerator.is_local_main_process) completed_steps = 0 # Potentially load in the weights and states from a previous save if args.resume_from_checkpoint: if args.resume_from_checkpoint is not None or args.resume_from_checkpoint != "": accelerator.print( f"Resumed from checkpoint: {args.resume_from_checkpoint}") accelerator.load_state(args.resume_from_checkpoint) resume_step = None path = args.resume_from_checkpoint else: # Get the most recent checkpoint dirs = [f.name for f in os.scandir(os.getcwd()) if f.is_dir()] dirs.sort(key=os.path.getctime) path = dirs[ -1] # Sorts folders by date modified, most recent checkpoint is the last if "epoch" in path: args.num_train_epochs -= int(path.replace("epoch_", "")) else: resume_step = int(path.replace("step_", "")) args.num_train_epochs -= resume_step // len(train_dataloader) resume_step = (args.num_train_epochs * len(train_dataloader)) - resume_step for epoch in range(args.num_train_epochs): model.train() if args.with_tracking: total_loss = 0 for step, batch in enumerate(train_dataloader): # We need to skip steps until we reach the resumed step if args.resume_from_checkpoint and epoch == 0 and step < resume_step: continue outputs = model(**batch) loss = outputs.loss # We keep track of the loss at each epoch if args.with_tracking: total_loss += loss.detach().float() loss = loss / args.gradient_accumulation_steps accelerator.backward(loss) if step % args.gradient_accumulation_steps == 0 or step == len( train_dataloader) - 1: optimizer.step() lr_scheduler.step() optimizer.zero_grad() progress_bar.update(1) completed_steps += 1 if isinstance(checkpointing_steps, int): if completed_steps % checkpointing_steps == 0: output_dir = f"step_{completed_steps}" if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) accelerator.save_state(output_dir) if args.push_to_hub and epoch < args.num_train_epochs - 1: accelerator.wait_for_everyone() unwrapped_model = accelerator.unwrap_model(model) unwrapped_model.save_pretrained( args.output_dir, save_function=accelerator.save) if accelerator.is_main_process: feature_extractor.save_pretrained(args.output_dir) repo.push_to_hub( commit_message= f"Training in progress {completed_steps} steps", blocking=False, auto_lfs_prune=True, ) if completed_steps >= args.max_train_steps: break model.eval() samples_seen = 0 for step, batch in enumerate(eval_dataloader): outputs = model(**batch) predictions = outputs.logits.argmax(dim=-1) predictions, references = accelerator.gather( (predictions, batch["labels"])) # If we are in a multiprocess environment, the last batch has duplicates if accelerator.num_processes > 1: if step == len(eval_dataloader): predictions = predictions[:len(eval_dataloader.dataset) - samples_seen] references = references[:len(eval_dataloader.dataset) - samples_seen] else: samples_seen += references.shape[0] metric.add_batch( predictions=predictions, references=references, ) eval_metric = metric.compute() logger.info(f"epoch {epoch}: {eval_metric}") if args.with_tracking: accelerator.log( { "accuracy": eval_metric, "train_loss": total_loss, "epoch": epoch, "step": completed_steps, }, ) if args.push_to_hub and epoch < args.num_train_epochs - 1: accelerator.wait_for_everyone() unwrapped_model = accelerator.unwrap_model(model) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) if accelerator.is_main_process: feature_extractor.save_pretrained(args.output_dir) repo.push_to_hub( commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True) if args.checkpointing_steps == "epoch": output_dir = f"epoch_{epoch}" if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) accelerator.save_state(output_dir) if args.output_dir is not None: accelerator.wait_for_everyone() unwrapped_model = accelerator.unwrap_model(model) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) if accelerator.is_main_process: feature_extractor.save_pretrained(args.output_dir) if args.push_to_hub: repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) if args.output_dir is not None: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: json.dump({"eval_accuracy": eval_metric["accuracy"]}, f)
from resnet.resnet_single_scale import * import importlib #import evalIoU from iouEval import iouEval, getColorEntry from shutil import copyfile NUM_CHANNELS = 3 NUM_CLASSES = 28 color_transform = Colorize(NUM_CLASSES) image_transform = ToPILImage() input_transform = Compose([ CenterCrop(240), ToTensor(), Normalize([.485, .456, .406], [.229, .224, .225]), ]) target_transform = Compose([ CenterCrop(240), ToLabel(), Relabel(255, 27), ]) #Augmentations - different function implemented to perform random augments on both image and target class MyCoTransform(object): def __init__(self, enc, augment=True, height=512): self.enc = enc self.augment = augment