def __getitem__(self, index): img_name = self.data['full_path'][index] image = io.imread(img_name) image = self.transform(fromarray(image)) target = self.labels[index] target = np.array([target]) target = target.astype('float32').reshape(len(target), -1) target = torch.from_numpy(target) return image, target image_size = (128, 128) transform_train = Compose([ Resize(image_size), Grayscale(), RandomHorizontalFlip(), RandomAffine(degrees=20, shear=(-0.2, 0.2, -0.2, 0.2), scale=(0.8, 1.2)), ToTensor() ]) transform_test = Compose([Resize(image_size), Grayscale(), ToTensor()]) xray_data_train, xray_data_test = train_test_split(xray_data, test_size=0.2, shuffle=False) dataset_train = XRayDatasetFromCSV(xray_data_train, transform_train) dataset_test = XRayDatasetFromCSV(xray_data_test, transform_test)
生成网络 ''' Net_G = Generator() Net_D = Discriminator() Net_G = DataParallel(Net_G) Net_D = DataParallel(Net_D) if GPU_NUMS > 1: Net_D.cuda() Net_G.cuda() G_optimizer = Adam(Net_G.parameters(), lr=LR, betas=BETAS) D_optimizer = Adam(Net_D.parameters(), lr=LR, betas=BETAS) ''' 数据读入与预处理 ''' transforms = Compose([ Resize(IMAGE_SIZE), CenterCrop(IMAGE_SIZE), ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = ImageFolder(root='../ganData/face_gender/', transform=transforms) train_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True) def one_hot(target): y = torch.zeros(target.size()[0], 10) for i in range(target.size()[0]):
def __init__(self, image_paths, true_bboxes, playout_episode=False, premasking=True, mode='train', max_steps_per_image=200, seed=None, bbox_scaling_w=0.05, bbox_scaling_h=0.1, bbox_transformer='base', has_termination_action=True, has_intermediate_reward=False, ior_marker_type='cross', history_length=10, assessor_model=None, train_assessor=False, grayscale=False, use_cut_area=False): """ :param image_paths: The paths to the individual images :param true_bboxes: The true bounding boxes for each image :type image_paths: String or list :type true_bboxes: numpy.ndarray """ # Determines whether the agent is training or testing # Optimizations can be applied during training that are not allowed for testing self.mode = mode # Factor for scaling all bounding boxes relative to their size self.bbox_scaling_w = bbox_scaling_w self.bbox_scaling_h = bbox_scaling_h # Whether IoR markers will be placed upfront after loading the image self.premasking = premasking # Whether an episode terminates after a single trigger or is played out until the end self.playout_episode = playout_episode # Episodes will be terminated automatically after reaching max steps self.max_steps_per_image = max_steps_per_image # Whether a termination action should be provided in the action set self.has_termination_action = has_termination_action # Whether a reward will be given for each non-trigger action based on the best gt iou self.has_intermediate_reward = has_intermediate_reward # The type of IoR marker to be used when masking trigger regions self.ior_marker_type = ior_marker_type # Length of history in state & agent model self.history_length = history_length # Whether to return grayscale, 1-channel environment images self.grayscale = grayscale # Use tightness-aware IoU for reward (incorporating cut gt) self.use_cut_area = use_cut_area # Initialize action space self.bbox_transformer = create_bbox_transformer(bbox_transformer) self.action_space = spaces.Discrete(len(self.action_set)) if self.grayscale: # 224*224*1 (RGB image) + 9 * 10 (on-hot-enconded history) self.observation_space = spaces.Tuple([ spaces.Box(low=0, high=256, shape=(450, 450, 1)), spaces.Box(low=0, high=1, shape=(self.history_length, len(self.action_set))) ]) else: # 224*224*3 (RGB image) + 9 * 10 (on-hot-enconded history) = 150618 self.observation_space = spaces.Tuple([ spaces.Box(low=0, high=256, shape=(450, 450, 3)), spaces.Box(low=0, high=1, shape=(self.history_length, len(self.action_set))) ]) # Initialize dataset if type(image_paths) is not list: image_paths = [image_paths] self.image_paths = image_paths self.true_bboxes = [[TextLocEnv.to_standard_box(b) for b in bboxes] for bboxes in true_bboxes] # For registering a handler that will be executed once after a step self.post_step_handler = None # Episode-specific # Image for the current episode self.episode_image = None self.current_image_index = 0 # Ground truth bounding boxes for the current episode image self.episode_true_bboxes = None # Predicted bounding boxes for the current episode image self.episode_pred_bboxes = None # IoU values for each trigger in the current episode self.episode_trigger_ious = None # List of indices of masked bounding boxes for the current episode image self.episode_masked_indices = [] # Number of trigger actions used so far self.num_triggers_used = 0 # Number of episodes rolled out so far self.episode_count = 0 # ID of last action taken self.last_action_taken = -1 # For rendering self.viewer = None # Assessor (weak-supervision) self.assessor = assessor_model self.train_assessor = train_assessor self.resize = Resize((450, 450), interpolation=InterpolationMode.NEAREST) self.seed(seed=seed) self.reset()
else: import torch as torch import torchvision from torchvision.transforms import Normalize, ToTensor, Resize from main import dataset from fashion_model import FashionModel from sklearn.preprocessing import MultiLabelBinarizer torch.manual_seed(42) if __name__ == "__main__": test_folder = '/media/spike/Scoob/materialist_fashion_test/' cvs_filename = input("[?] Output csv name: ") # '001' use_cuda = torch.cuda.is_available() image_size = 224 scale = Resize((image_size, image_size)) normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) tforms = torchvision.transforms.Compose([scale, ToTensor(), normalize]) batch = 32 total_images = 39706 mf_test_set = dataset.TestMaterialistFashion(test_folder, total_images, tforms) mf_test_loader = torch.utils.data.DataLoader(mf_test_set, batch_size=batch, shuffle=False, num_workers=8) print("Size of test loader: {}".format(len(mf_test_loader))) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = FashionModel() model_state = torch.load('tmp/model_state_best.th')
def input_transform(crop_size, upscale_factor): return Compose([ CenterCrop(crop_size), Resize(crop_size // upscale_factor), ToTensor(), ])
def initialize(self): # We need to initialize the model inside self.run and not self.__init__ # to ensure that the model loads in the correct thread. config_path = 'expt/nytimes/9_transformer_objects/config.yaml' logger.info(f'Loading config from {config_path}') config = yaml_to_params(config_path, overrides='') prepare_environment(config) vocab = Vocabulary.from_params(config.pop('vocabulary')) model = Model.from_params(vocab=vocab, params=config.pop('model')) model = model.eval() model_path = 'expt/nytimes/9_transformer_objects/serialization/best.th' logger.info(f'Loading best model from {model_path}') best_model_state = torch.load(model_path, map_location=torch.device('cpu')) model.load_state_dict(best_model_state) self.model = model.to(self.device) logger.info('Loading roberta model.') roberta = torch.hub.load('pytorch/fairseq:2f7e3f3323', 'roberta.base') self.bpe = roberta.bpe self.indices = roberta.task.source_dictionary.indices logger.info('Loading face detection model.') self.mtcnn = MTCNN(keep_all=True, device=self.device) self.inception = InceptionResnetV1(pretrained='vggface2').eval() self.resnet = resnet152() self.resnet = self.resnet.to(self.device).eval() cfg = 'tell/yolov3/cfg/yolov3-spp.cfg' weight_path = 'data/yolov3-spp-ultralytics.pt' self.darknet = Darknet(cfg, img_size=416) attempt_download(weight_path) self.darknet.load_state_dict( torch.load(weight_path, map_location=self.device)['model']) self.darknet.to(self.device).eval() # Get names and colors self.names = load_classes('tell/yolov3/data/coco.names') random.seed(123) self.colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(self.names))] self.preprocess = Compose([ Resize(256), CenterCrop(224), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) data_iterator = BasicIterator(batch_size=4) data_iterator.index_with(model.vocab) self.data_iterator = data_iterator self.tokenizer = Tokenizer.from_params( config.get('dataset_reader').get('tokenizer')) indexer_params = config.get('dataset_reader').get('token_indexers') self.token_indexers = { k: TokenIndexer.from_params(p) for k, p in indexer_params.items() }
""" Check the time for processing images only """ # Dataset from utils.datasets import DeepFashionDataset from torchvision.transforms import Compose from torchvision.transforms import Resize from torchvision.transforms import ToTensor from torchvision.transforms import Normalize from config.deep_fashion import DeepFashionConfig as cfg from torch.utils.data import DataLoader from utils.datasets import Siamesize from time import time trans = Compose([ Resize(cfg.sizes), ToTensor(), Normalize(cfg.mean, cfg.std), ]) # dataset train_ds = DeepFashionDataset(cfg.root_dir, 'train', transform=trans) siamese_train_ds = Siamesize(train_ds) loader_kwargs = { 'pin_memory': True, 'batch_size': 100, 'num_workers': 16, } s_train_loader = DataLoader(siamese_train_ds, **loader_kwargs) device = "cuda" for _ in range(1):
plt.plot(trainErrsTotal, '-', label="train total", color=(0.5, 0, 0.8)) #plt.plot( testErrsTotal, '-', label = "test total", color = (0.5,0.8,0) ) plt.yscale('log') plt.grid(True) plt.legend() plt.savefig("./errors") normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) img_size = (100, 100) composed = Compose([ ToPILImage(), Resize(img_size), RandomHorizontalFlip(), RandomGrayscale(p=0.5), RandomRotation(degrees=30, center=None), ToTensor(), normalize ]) train_dataset = HumpbackWhaleDataset(csv_file='./train_no_nu_whales.csv', root_dir="./train", transform=composed) #test_dataset = TitanicDataset(csvFile = 'test.csv') train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=50, shuffle=True, num_workers=4)
def imagenet(): normalize = Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) return Compose([Resize(size=(224, 224)), ToTensor(), normalize])
def __init__(self, mode, roidb_file=VG_SGG_FN, dict_file=VG_SGG_DICT_FN, image_file=IM_DATA_FN, filter_empty_rels=True, num_im=-1, num_val_im=5000, filter_duplicate_rels=True, filter_non_overlap=True, use_proposals=False): """ Torch dataset for VisualGenome :param mode: Must be train, test, or val :param roidb_file: HDF5 containing the GT boxes, classes, and relationships :param dict_file: JSON Contains mapping of classes/relationships to words :param image_file: HDF5 containing image filenames :param filter_empty_rels: True if we filter out images without relationships between boxes. One might want to set this to false if training a detector. :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead :param num_im: Number of images in the entire dataset. -1 for all images. :param num_val_im: Number of images in the validation set (must be less than num_im unless num_im is -1.) :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN proposals """ if mode not in ('test', 'train', 'val'): raise ValueError( "Mode must be in test, train, or val. Supplied {}".format( mode)) self.mode = mode # Initialize self.roidb_file = roidb_file self.dict_file = dict_file self.image_file = image_file self.filter_non_overlap = filter_non_overlap self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train' self.split_mask, self.gt_boxes, self.gt_classes, self.relationships = load_graphs( self.roidb_file, self.mode, num_im, num_val_im=num_val_im, filter_empty_rels=filter_empty_rels, filter_non_overlap=self.filter_non_overlap and self.is_train, ) self.filenames = load_image_filenames(image_file) # self.filenames = [self.filenames[i] for i in np.where(self.split_mask)[0]] self.ind_to_classes, self.ind_to_predicates = load_info(dict_file) if use_proposals: print("Loading proposals", flush=True) p_h5 = h5py.File(PROPOSAL_FN, 'r') rpn_rois = p_h5['rpn_rois'] rpn_scores = p_h5['rpn_scores'] rpn_im_to_roi_idx = np.array( p_h5['im_to_roi_idx'][self.split_mask]) rpn_num_rois = np.array(p_h5['num_rois'][self.split_mask]) self.rpn_rois = [] for i in range(len(self.filenames)): rpn_i = np.column_stack(( rpn_scores[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] + rpn_num_rois[i]], rpn_rois[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] + rpn_num_rois[i]], )) self.rpn_rois.append(rpn_i) else: self.rpn_rois = None # You could add data augmentation here. But we didn't. # tform = [] # if self.is_train: # tform.append(RandomOrder([ # Grayscale(), # Brightness(), # Contrast(), # Sharpness(), # Hue(), # ])) tform = [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] self.transform_pipeline = Compose(tform)
metavar='N', help='how many batches to wait before logging training status') args = parser.parse_args() #args.cuda = not args.no_cuda and torch.cuda.is_available() args.cuda = False if args.cuda: torch.cuda.manual_seed(args.seed) receptive_filter_size = 4 hidden_size = 320 image_size_w = 32 image_size_h = 32 input_transform = Compose([ Resize((32, 32)), ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) target_transform = Compose([ Resize((32, 32)), ToLabel(), ]) #trainset = torchvision.datasets.CIFAR10(root='./data', train=True, # download=True, transform=transform) #trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, # shuffle=True, num_workers=2) trainloader = DataLoader(train(input_transform, target_transform), num_workers=1, batch_size=1,
from PIL import Image class Faces(Dataset): def __init__(self, root, transform=None): self.root = root self.transform = transform self.images = glob.glob(os.path.join(root, '*.jpg')) def __getitem__(self, index): image = Image.open(self.images[index]).convert('RGB') if self.transform is not None: image = self.transform(image) return image def __len__(self): return len(self.images) if __name__ == '__main__': from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize transform = Compose([ Resize(64), CenterCrop(64), ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) root = '/home/zsh_o/work/data/faces/' dataset = Faces(root=root, transform=transform) print(dataset[0])
def __call__(self, input, target): # do something to both images input = Resize(self.height, Image.BILINEAR)(input) target = Resize(self.height, Image.NEAREST)(target) if (self.augment): # Random hflip hflip = random.random() if (hflip < 0.5): input = input.transpose(Image.FLIP_LEFT_RIGHT) target = target.transpose(Image.FLIP_LEFT_RIGHT) #Random translation 0-2 pixels (fill rest with padding transX = random.randint(-2, 2) transY = random.randint(-2, 2) input = ImageOps.expand(input, border=(transX, transY, 0, 0), fill=0) target = ImageOps.expand(target, border=(transX, transY, 0, 0), fill=255) #pad label filling with 255 input = input.crop( (0, 0, input.size[0] - transX, input.size[1] - transY)) target = target.crop( (0, 0, target.size[0] - transX, target.size[1] - transY)) input = ToTensor()(input) if (self.enc): target = Resize(int(self.height / 8), Image.NEAREST)(target) target = ToLabel()(target) target = Relabel(255, 19)(target) return input, target
def main(): # See all possible arguments in src/transformers/training_args.py # or by passing the --help flag to this script. # We now keep distinct sets of args, for a cleaner separation of concerns. parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments)) if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): # If we pass only one argument to the script and it's the path to a json file, # let's parse it to get our arguments. model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) log_level = training_args.get_process_log_level() logger.setLevel(log_level) transformers.utils.logging.set_verbosity(log_level) transformers.utils.logging.enable_default_handler() transformers.utils.logging.enable_explicit_format() # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) logger.info(f"Training/evaluation parameters {training_args}") # Detecting last checkpoint. last_checkpoint = None if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: last_checkpoint = get_last_checkpoint(training_args.output_dir) if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0: raise ValueError( f"Output directory ({training_args.output_dir}) already exists and is not empty. " "Use --overwrite_output_dir to overcome." ) elif last_checkpoint is not None and training_args.resume_from_checkpoint is None: logger.info( f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." ) # Initialize our dataset and prepare it for the 'image-classification' task. ds = load_dataset( data_args.dataset_name, data_args.dataset_config_name, data_files=data_args.data_files, cache_dir=model_args.cache_dir, task="image-classification", ) # Define torchvision transforms to be applied to each image. normalize = Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) _train_transforms = Compose( [ RandomResizedCrop(data_args.image_size), RandomHorizontalFlip(), ToTensor(), normalize, ] ) _val_transforms = Compose( [ Resize(data_args.image_size), CenterCrop(data_args.image_size), ToTensor(), normalize, ] ) def train_transforms(example_batch): """Apply _train_transforms across a batch.""" example_batch["pixel_values"] = [_train_transforms(pil_loader(f)) for f in example_batch["image_file_path"]] return example_batch def val_transforms(example_batch): """Apply _val_transforms across a batch.""" example_batch["pixel_values"] = [_val_transforms(pil_loader(f)) for f in example_batch["image_file_path"]] return example_batch # If we don't have a validation split, split off a percentage of train as validation. data_args.train_val_split = None if "validation" in ds.keys() else data_args.train_val_split if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0: split = ds["train"].train_test_split(data_args.train_val_split) ds["train"] = split["train"] ds["validation"] = split["test"] # Prepare label mappings. # We'll include these in the model's config to get human readable labels in the Inference API. labels = ds["train"].features["labels"].names label2id, id2label = dict(), dict() for i, label in enumerate(labels): label2id[label] = str(i) id2label[str(i)] = label # Load the accuracy metric from the datasets package metric = datasets.load_metric("accuracy") # Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a # predictions and label_ids field) and has to return a dictionary string to float. def compute_metrics(p): """Computes accuracy on a batch of predictions""" return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids) config = AutoConfig.from_pretrained( model_args.config_name or model_args.model_name_or_path, num_labels=len(labels), label2id=label2id, id2label=id2label, finetuning_task="image-classification", cache_dir=model_args.cache_dir, revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, ) model = AutoModelForImageClassification.from_pretrained( model_args.model_name_or_path, from_tf=bool(".ckpt" in model_args.model_name_or_path), config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, ) # NOTE - We aren't directly using this feature extractor since we defined custom transforms above. # We initialize this instance below and pass it to Trainer to ensure that the feature extraction # config, preprocessor_config.json, is included in output directories. # This way if we push a model to the hub, the inference widget will work. feature_extractor = AutoFeatureExtractor.from_pretrained( model_args.feature_extractor_name or model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, size=data_args.image_size, image_mean=normalize.mean, image_std=normalize.std, ) if training_args.do_train: if "train" not in ds: raise ValueError("--do_train requires a train dataset") if data_args.max_train_samples is not None: ds["train"] = ds["train"].shuffle(seed=training_args.seed).select(range(data_args.max_train_samples)) # Set the training transforms ds["train"].set_transform(train_transforms) if training_args.do_eval: if "validation" not in ds: raise ValueError("--do_eval requires a validation dataset") if data_args.max_eval_samples is not None: ds["validation"] = ( ds["validation"].shuffle(seed=training_args.seed).select(range(data_args.max_eval_samples)) ) # Set the validation transforms ds["validation"].set_transform(val_transforms) # Initalize our trainer trainer = Trainer( model=model, args=training_args, train_dataset=ds["train"] if training_args.do_train else None, eval_dataset=ds["validation"] if training_args.do_eval else None, compute_metrics=compute_metrics, tokenizer=feature_extractor, data_collator=collate_fn, ) # Training if training_args.do_train: checkpoint = None if training_args.resume_from_checkpoint is not None: checkpoint = training_args.resume_from_checkpoint elif last_checkpoint is not None: checkpoint = last_checkpoint train_result = trainer.train(resume_from_checkpoint=checkpoint) trainer.save_model() trainer.log_metrics("train", train_result.metrics) trainer.save_metrics("train", train_result.metrics) trainer.save_state() # Evaluation if training_args.do_eval: metrics = trainer.evaluate() trainer.log_metrics("eval", metrics) trainer.save_metrics("eval", metrics) # Write model card and (optionally) push to hub kwargs = { "finetuned_from": model_args.model_name_or_path, "tasks": "image-classification", "dataset": data_args.dataset_name, "tags": ["image-classification"], } if training_args.push_to_hub: trainer.push_to_hub(**kwargs) else: trainer.create_model_card(**kwargs)
def train_panet(device, resume=False, dataset_name='voc'): pre_trained_encoder_path = '../data/vgg16-397923af.pth' if cfg['panet'][ 'use_pretrained'] else None model = PANetFewShotSeg(in_channels=cfg[dataset_name]['channels'], pretrained_path=pre_trained_encoder_path, cfg={ 'align': True }, encoder_type=cfg['panet']['backbone']).to(device) optimizer = torch.optim.SGD(model.parameters(), lr=cfg['panet']['lr'], momentum=cfg['panet']['momentum'], weight_decay=cfg['panet']['weight_decay']) scheduler = MultiStepLR(optimizer, milestones=cfg['panet']['lr_milestones'], gamma=0.1) epoch = 0 model.train() if resume: epoch = load_state(cfg[dataset_name]['model_name'], model, optimizer, scheduler) if dataset_name == 'voc': transforms = Compose([ Resize(size=cfg['panet']['vgg_inp_size']), ]) elif dataset_name == 'ircadb': transforms = Compose([ Resize(size=cfg['panet']['unet_inp_size']), ]) if dataset_name == 'voc': train_dataset = get_pascal_few_shot_datasets( range(1, 16), cfg['panet']['train_iterations'], cfg['nshot'], cfg['nquery'], transforms) elif dataset_name == 'ircadb': train_dataset = get_ircadb_few_shot_datasets( organs=[ "bone", "spleen", "leftkidney", "rightkidney", "leftlung", "rightlung", "gallbladder" ], patient_ids=range(1, 16), iterations=cfg['panet']['train_iterations'], N_shot=cfg['nshot'], N_query=cfg['nquery'], transforms=transforms) trainloader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=1, pin_memory=True, drop_last=True) criterion = nn.CrossEntropyLoss(ignore_index=255) log_loss = {'loss': 0, 'align_loss': 0} for i_iter, (support, query) in enumerate(tqdm(trainloader)): support_images = [[]] support_fg_mask = [[]] support_bg_mask = [[]] for i in range(len(support)): # print(support[i][0].shape) support_images[0].append(support[i][0].to(device)) support_fg_mask[0].append(support[i][1].to(device)) support_bg_mask[0].append(support[i][2].to(device)) query_images = [] query_labels = [] for i in range(len(query)): query_images.append(query[i][0].to(device)) query_labels.append(query[i][1].to(device)) query_labels = torch.cat(query_labels, dim=0).long().to(device) # Forward and Backward optimizer.zero_grad() query_pred, align_loss = model(support_images, support_fg_mask, support_bg_mask, query_images) query_loss = criterion(query_pred, query_labels) loss = query_loss + align_loss * cfg['panet']['align_loss_scalar'] loss.backward() optimizer.step() scheduler.step() # Log loss query_loss = query_loss.detach().data.cpu().numpy() align_loss = align_loss.detach().data.cpu().numpy( ) if align_loss != 0 else 0 log_loss['loss'] += query_loss log_loss['align_loss'] += align_loss # print loss and take snapshots if (i_iter + 1) % cfg['panet']['save_period'] == 0: loss = log_loss['loss'] / (i_iter + 1) align_loss = log_loss['align_loss'] / (i_iter + 1) print('\nstep {}: loss: {}, align_loss: {}'.format( i_iter + 1, loss, align_loss)) if (i_iter + 1) % cfg['panet']['save_period'] == 0: save_state(cfg[dataset_name]['model_name'], model, optimizer, scheduler, epoch + i_iter + 1) print("\nModel Saved On Iteration {} ...".format(epoch + i_iter + 1)) return model
def train_lr_transform(crop_size, upscale_factor): return Compose([ ToPILImage(), Resize(crop_size // upscale_factor, interpolation=Image.BICUBIC), ToTensor() ])
# Losses reconstruction_loss = None if ARGUMENTS.loss == 'l1': reconstruction_loss = nn.L1Loss().to(device) elif ARGUMENTS.loss == 'lpips': reconstruction_loss = LPIPS(device).to(device) envmap_loss = None if envmap_colorspace == 'rgb': envmap_loss = log_l2_loss else: envmap_loss = hsv_envmap_loss # Configure data sets transform = Resize(SIZE) pairing_strategies = [DifferentScene(), DifferentLightDirection()] train_dataset = InputTargetGroundtruthWithGeneratedEnvmapDataset( transform=transform, pairing_strategies=pairing_strategies, mode=envmap_colorspace) test_dataset = InputTargetGroundtruthWithGeneratedEnvmapDataset( data_path=VALIDATION_DATA_PATH, transform=transform, pairing_strategies=pairing_strategies, mode=envmap_colorspace) # Configure data loaders # Sub-sampling: # https://discuss.pytorch.org/t/train-on-a-fraction-of-the-data-set/16743/2 # https://discuss.pytorch.org/t/torch-equivalent-of-numpy-random-choice/16146/5
def valid_hr_transform(shape, upscale_factor): return Compose([ ToTensor(), Resize((shape[0] // upscale_factor, shape[1] // upscale_factor), interpolation=Image.BICUBIC) ])
def main(): args = arguments() if torch.cuda.is_available(): device = torch.device("cuda:0") # Can continue going on here, like cuda:1 cuda:2....etc. print("Running on the GPU") else: device = torch.device("cpu") print("Running on the CPU") transforms = Compose([Resize((50, 50)), ToTensor()]) dataset = ImageFolder("Data_WetSeason", transform=transforms) testset = ImageFolder("Test_WetSeason", transform=transforms) INPUT_SIZE = dataset[0][0].shape """ train_val_len = int(0.9 * len(dataset)) test_len = int(len(dataset) - train_val_len) train_len = int(0.8 * 0.9 * len(dataset)) val_len = int(len(dataset) - test_len - train_len) """ """train_len = int(0.7 * len(dataset)) val_len = int(0.1 * len(dataset)) test_len = int(len(dataset) - train_len - val_len) # train, test = random_split(dataset, lengths=(train_len, test_len)) train, validation, test = random_split(dataset, lengths=(train_len, val_len, test_len)) train_loader = DataLoader(train, batch_size=TRAIN_BATCH_SIZE, shuffle=True) val_loader = DataLoader(validation, batch_size=VAL_BATCH_SIZE, shuffle=False) test_loader = DataLoader(test, batch_size=TEST_BATCH_SIZE, shuffle=False) # prediction_loader = DataLoader(dataset, batch_size=PRED_BATCH_SIZE) """ train_len = int(0.8 * len(dataset)) val_len = int(len(dataset) - train_len) train, val = random_split(dataset, lengths=(train_len, val_len)) train_loader = DataLoader(train, batch_size=args.train_batch_size, shuffle=True) val_loader = DataLoader(val, batch_size=args.val_batch_size, shuffle=False) prediction_loader = DataLoader(testset, batch_size=args.pred_batch_size) net = Net(INPUT_SIZE).to(device) optimizer = optim.Adam(net.parameters(), lr=0.001) loss_function = nn.CrossEntropyLoss() # with open("CNN_model.log", "a") as f: for epoch in range(args.epochs): net.train() sum_acc = 0 for x, y in train_loader: x = x.to(device) y = y.to(device) acc, loss = step(x, y, net=net, optimizer=optimizer, loss_function=loss_function, train=True) sum_acc += acc train_avg_acc = sum_acc / len(train_loader) print(f"Training accuracy: {train_avg_acc:.2f}") net.eval() sum_acc = 0 for x, y in val_loader: x = x.to(device) y = y.to(device) val_acc, val_loss = step(x, y, net=net, optimizer=optimizer, loss_function=loss_function, train=True) sum_acc += val_acc val_avg_acc = sum_acc / len(val_loader) print(f"Validation accuracy: {val_avg_acc:.2f}") train_steps = len(train_loader) * (epoch + 1) wandb.log({"Train Accuracy": train_avg_acc, "Validation Accuracy": val_avg_acc}, step=train_steps) # train_preds = get_all_preds(net, test_loader) train_preds = get_all_preds(net, loader=prediction_loader, device=device) plt.figure(figsize=(10, 10)) wandb.sklearn.plot_confusion_matrix(testset.targets, train_preds.argmax(dim=1), LABELS) precision, recall, f1_score, support = score(testset.targets, train_preds.argmax(dim=1)) test_acc = accuracy_score(testset.targets, train_preds.argmax(dim=1)) print(f"Test Accuracy: {test_acc}") print('precision: {}'.format(precision)) print('recall: {}'.format(recall)) print('f1_score: {}'.format(f1_score)) print('support: {}'.format(support))
def __init__(self, base_path,data_args,data_split, sample_rate, max_sample_size=None, min_sample_size=None, shuffle=True): super().__init__() self.data_args=data_args self.sample_rate = sample_rate self.fnames_audio = [] self.fnames_text = [] self.fnames_video = [] self.sizes_audio = [] self.sizes_video = [] self.labels = {} #####Video Frame ##### self.channels = 3 self.timeDepth = 300 self.xSize = 256 self.ySize = 256 IMAGE_SIZE=(self.xSize,self.ySize) self.transform = Compose([Resize(IMAGE_SIZE), ToTensor()]) self.max_sample_size = max_sample_size if max_sample_size is not None else sys.maxsize self.min_sample_size = min_sample_size if min_sample_size is not None else self.max_sample_size self.base_manifest_path = base_path self.split = data_split if self.data_args.binary_target_iemocap: included_emotions = ['neu','ang','sad','hap'] # 'exc', IEMOCAP (Max 5 emotions (only take 4 in prior work)) elif self.data_args.softmax_target_meld: print("We are using MELD for the softmax classification") included_emotions = ['neutral','sadness','surprise','joy','anger','fear','disgust'] #MELD (Max 7 emotion) #included_emotions = ['neutral','sadness','surprise','joy','anger'] elif self.data_args.softmax_target_binary_meld: included_emotions = ['neutral','sadness','surprise','joy','anger','fear','disgust'] #MELD (Max 7 emotion) else: print("We are using MOSEI or MOSI to do a regression task") manifest_audio = os.path.join(self.base_manifest_path, '{}.tsv'.format(self.split+"_a")) manifest_text = os.path.join(self.base_manifest_path, '{}.tsv'.format(self.split+"_t")) manifest_video = os.path.join(self.base_manifest_path, '{}.tsv'.format(self.split+"_v")) manifest_label = os.path.join(self.base_manifest_path, '{}.csv'.format("label_file_"+self.split)) with open(manifest_label, 'r') as f_l : self.root_dir_l = f_l.readline().strip() for line_l in f_l: items_l = line_l.strip().split(',') if self.data_args.regression_target_mos: self.labels[items_l[0].strip()] = np.round(float(items_l[1].strip()),decimals=4) else: self.labels[items_l[0].strip()] = items_l[1].strip() #for the sentiment use 2 from the list else 1 #inter_n=0 with open(manifest_audio, 'r') as f_a, open(manifest_text, 'r') as f_t, open(manifest_video, 'r') as f_v :#, open(manifest_label, 'r') as f_l: self.root_dir_a = f_a.readline().strip() self.root_dir_t = f_t.readline().strip() self.root_dir_v = f_v.readline().strip() for line_a, line_t, line_v in zip(f_a,f_t,f_v):#,f_l):, line_l items_a = line_a.strip().split('\t') items_t = line_t.strip().split('\t') items_v = line_v.strip().split('\t') # inter_n=inter_n+1 # if inter_n>5: # break assert items_a[0].split('.')[0] == items_t[0].split('.')[0] == items_v[0].split('.')[0], "misalignment of data" emotion = self.labels.get(items_v[0].split('.')[0]) #If the label is not there, gives a none if self.data_args.regression_target_mos: if self.data_args.eval_matric: if emotion==0.0: continue self.fnames_audio.append(items_a[0]) self.fnames_text.append(items_t[0]) self.fnames_video.append(items_v[0]) self.sizes_audio.append(1000000) #This is used in the data loader np.lexsort but can remove it self.sizes_video.append(1000000) else: if emotion in included_emotions: # Only using the subset of emotions self.fnames_audio.append(items_a[0]) self.fnames_text.append(items_t[0]) self.fnames_video.append(items_v[0]) self.sizes_audio.append(1000000) self.sizes_video.append(1000000) if self.data_args.binary_target_iemocap: self.emotion_dictionary = { #EMOCAP 'neu':0, 'ang':2, 'hap':3, 'sad':1, #'exc':3 } if self.data_args.softmax_target_meld: self.emotion_dictionary = { #MELD 'anger' : 2, 'joy': 3, 'neutral': 0, 'sadness': 1, 'surprise':4, 'fear':5, 'disgust':6 } # self.emotion_dictionary = { #MELD # 'anger' : 2, # 'joy': 3, # 'neutral': 0, # 'sadness': 1, # 'surprise':4, # #'fear':5, # #'disgust':6 # } if self.data_args.regression_target_mos: self.emotion_dictionary = { #modei senti '-3' : 6, '-2': 5, '-1': 4, '0': 0, '1':1, '2':2, '3':3 } # self.emotion_dictionary = { #modei senti 2 class # '0': 0, # '1':1 # } self.shuffle = shuffle
def __call__(self, input, target): # do something to both images input = Scale(self.height, Image.BILINEAR)(input) target = Scale(self.height, Image.NEAREST)(target) if (self.augment): # Random hflip hflip = random.random() if (hflip < 0.5): input = input.transpose(Image.FLIP_LEFT_RIGHT) target = target.transpose(Image.FLIP_LEFT_RIGHT) degree = random.randint(-20, 20) input = input.rotate(degree, resample=Image.BILINEAR, expand=True) target = target.rotate(degree, resample=Image.NEAREST, expand=True) w, h = input.size nratio = random.uniform(0.5, 1.0) ni = random.randint(0, int(h - nratio * h)) nj = random.randint(0, int(w - nratio * w)) input = input.crop( (nj, ni, int(nj + nratio * w), int(ni + nratio * h))) target = target.crop( (nj, ni, int(nj + nratio * w), int(ni + nratio * h))) input = Resize((480, 640), Image.BILINEAR)(input) target = Resize((480, 640), Image.NEAREST)(target) brightness_factor = random.uniform(0.8, 1.2) contrast_factor = random.uniform(0.8, 1.2) saturation_factor = random.uniform(0.8, 1.2) #sharpness_factor=random.uniform(0.0,2.0) hue_factor = random.uniform(-0.2, 0.2) enhancer1 = ImageEnhance.Brightness(input) input = enhancer1.enhance(brightness_factor) enhancer2 = ImageEnhance.Contrast(input) input = enhancer2.enhance(contrast_factor) enhancer3 = ImageEnhance.Color(input) input = enhancer3.enhance(saturation_factor) #enhancer4=ImageEnhance.Sharpness(input) #input=enhancer4.enhance(sharpness_factor) input_mode = input.mode h, s, v = input.convert('HSV').split() np_h = np.array(h, dtype=np.uint8) with np.errstate(over='ignore'): np_h += np.uint8(hue_factor * 255) h = Image.fromarray(np_h, 'L') input = Image.merge('HSV', (h, s, v)).convert(input_mode) else: input = Resize((480, 640), Image.BILINEAR)(input) target = Resize((480, 640), Image.NEAREST)(target) input = ToTensor()(input) if (self.enc): target = Resize((60, 80), Image.NEAREST)(target) target = ToLabel()(target) target = Relabel(255, 27)(target) return input, target
if torch.cuda.is_available(): print("CUDA available") else: print("CUDA not available") device = torch.device("cuda:0" if cuda_available else "cpu") # DATA SPLIT_RATIO: float = 0.7 BATCH_SIZE: int = 32 NUM_EPOCHS: int = 15 movie_data_set: MovieSuccessDataset = MovieSuccessDataset(MOVIE_DATA_FILE, POSTERS_DIR, Dictionary(DATA_DIR / 'dict2000.json'), Compose([Resize((299, 299)), ToTensor()])) data_set_size: int = len(movie_data_set) print(f'Size of the data-set: {data_set_size}') train_data_set_size: int = int(data_set_size * SPLIT_RATIO) val_data_set_size: int = data_set_size - train_data_set_size train_dataset, val_dataset = torch.utils.data.random_split(movie_data_set, [train_data_set_size, val_data_set_size]) weights: np.ndarray = get_class_weights(train_dataset) weighted_sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights)) train_data_set_loader: DataLoader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
if opts.model == 'siamese': model = siamese(opts.input_channels) elif opts.model == 'cnn_pairwise': model = CnnPairwise(opts.input_channels) elif opts.model == 'CRFN': model = CRFN(opts.input_channels) model = model.to(device) contrastive = ContrastiveLoss(margin=opts.margin) CE = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=opts.lr) optimizer.zero_grad() transform = Compose([Resize(105), ToTensor()]) train_pairs = PairedImagesDataset(data_path=opts.train_data, size=40000, transform=transform, enable_fake_pairs=opts.enable_fake_pairs) val_pairs = PairedImagesDataset(data_path=opts.val_data, size=10000, transform=transform, enable_fake_pairs=opts.enable_fake_pairs) train_pairs_loader = DataLoader(dataset=train_pairs, batch_size=opts.batch_size, shuffle=True) val_pairs_loader = DataLoader(dataset=val_pairs, batch_size=opts.batch_size,
def __init__(self): self.args = args self.input_transform = Compose([ Resize((512, 512)), ToTensor( # Resize((resize,resize)), ), Normalize([.485, .456, .406], [.229, .224, .225]) ]) self.label_transform = Compose( [Resize((512, 512)), ToLabel(), Relabel()]) # self.net = model().cuda() # 相关性改成这种 self.net = crate_Den_Resnet_model().cuda() # self.net = UnetResNetaddbn().cuda() # self.net = UnetResNet_dropout().cuda() # self.net = model_siamese_addbn_advance().cuda() # self.net = model_siamese_addbn_pooling_sort().cuda() # self.net = model_siamese_addbn_advance_pooling().cuda() # self.net = model_siamese_addbn_w_x_pooling().cuda() # checkpoint = torch.load(self.args.model_path) # self.net.load_state_dict(checkpoint,strict=True) # # self.net = torch.load(self.args.model_path).cuda() # checkpoint = torch.load('/home/gongxp/mlmr/githubcode/siamase_pytorch/resnet50_origin.pth') # self.net.load_state_dict(checkpoint, strict=False) self.train_data_loader = DataLoader(coseg_train_dataset( self.args.train_data, self.args.train_label, self.args.train_txt, self.input_transform, self.label_transform), num_workers=self.args.num_worker, batch_size=self.args.batch_size, shuffle=True) self.val_data_loader = DataLoader(coseg_val_dataset( self.args.val_data, self.args.val_label, self.args.val_txt, self.input_transform, self.label_transform), num_workers=self.args.num_worker, batch_size=self.args.batch_size, shuffle=False) self.optimizer = optim.Adam(self.net.parameters(), lr=self.args.lr, weight_decay=self.args.weight_decay) # self.optimizer = optim.SGD(self.net.parameters(), lr=self.args.lr, momentum=0.9, nesterov=True,weight_decay=self.args.weight_decay) # self.steps_per_epoch = int( np.ceil( get_file_len(self.args.train_txt) / float(self.args.batch_size))) self.scheduler = torch.optim.lr_scheduler.StepLR( self.optimizer, step_size=self.steps_per_epoch * 2, gamma=0.75) self.loss_func = nn.CrossEntropyLoss() self.focal_loss = FocalLoss2d() # error self.BCEsoftJaccarddice = BCESoftJaccardDice() # error self.BCESoftJaccarddice_rate_change = BCESoftJaccardDiceRateChange( ) # error # self.dice_loss = DiceLoss() # summary(self.net, [(3, 512, 512), (3, 512, 512)])
def __init__(self, model, emotions, img_size=(224, 224), device=None): self.emotions = emotions self.transform = Compose([Resize(img_size), Grayscale(1), ToTensor()]) self.device = torch.device('cpu') if device is None else device self.model = model.eval().to(self.device)
gallery_labels = torch.cat(gallery_labels, dim=0).numpy() gallery_cams = torch.cat(gallery_cams, dim=0).numpy() Cmc, mAP = Video_Cmc(gallery_features, gallery_labels, gallery_cams, dataloader.dataset.query_idx, 10000) network.train() return Cmc[0:20], mAP if __name__ == '__main__': #Parse args args = parser.parse_args() # set transformation (H flip is inside dataset) train_transform = Compose([ Resize((256, 128)), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) test_transform = Compose([ Resize((256, 128)), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) print('Start dataloader...') train_dataloader = utils.Get_Video_train_DataLoader(args.train_txt,args.train_info, train_transform, shuffle=True,num_workers=args.num_workers,\ S=args.S,track_per_class=args.track_per_class,class_per_batch=args.class_per_batch) num_class = train_dataloader.dataset.n_id test_dataloader = utils.Get_Video_test_DataLoader(args.test_txt,args.test_info,args.query_info,test_transform,batch_size=args.batch_size,\ shuffle=False,num_workers=args.num_workers,S=args.S,distractor=True) print('End dataloader... n_id', num_class)
self.transforms = transforms def __call__(self, img, seed, mask_flag=False): for t in self.transforms: if isinstance(ColorJitter, t) and mask_flag: continue random.seed(seed) img = t(img) return img train_img_aug = Compose_own([ RandomAffine(90, shear=45), RandomRotation(90), RandomHorizontalFlip(), ColorJitter(), Resize(size=(img_size, img_size)), ToTensor()]) train_mask_aug = Compose_own([Resize(size=(img_size, img_size)), ToTensor()]) def __getitem__(self, index): img = Image.open(self.data[index]).convert('RGB') target = Image.open(self.data_labels[index]) seed = np.random.randint(1000000) # make a seed with numpy generator random.seed(seed) # apply this seed to img tranfsorms if self.transform is not None: img = self.transform(img)
def test_panet(device, model=None, dataset_name='voc', test_organ='liver'): if model is None: # pretrained_path='../data/vgg16-397923af.pth' model = PANetFewShotSeg( in_channels=cfg[dataset_name]['channels'], pretrained_path=None, cfg={ 'align': True }, encoder_type=cfg['panet']['backbone']).to(device) load_state(cfg[dataset_name]['model_name'], model) model.eval() if dataset_name == 'voc': transforms = Compose([ Resize(size=cfg['panet']['vgg_inp_size']), ]) elif dataset_name == 'ircadb': transforms = Compose([ Resize(size=cfg['panet']['unet_inp_size']), ]) if dataset_name == 'voc': test_dataset = get_pascal_few_shot_datasets( range(16, 21), cfg['panet']['test_iterations'], cfg['nshot'], cfg['nquery'], transforms) elif dataset_name == 'ircadb': test_dataset = get_ircadb_few_shot_datasets( organs=[test_organ], patient_ids=range(16, 21), iterations=cfg['panet']['test_iterations'], N_shot=cfg['nshot'], N_query=cfg['nquery'], transforms=transforms) testloader = DataLoader(test_dataset, batch_size=1, shuffle=True, num_workers=1, pin_memory=True, drop_last=True) metric = Metric(max_label=20, n_runs=1) for i_iter, (support, query) in enumerate(testloader): support_images = [[]] support_fg_mask = [[]] support_bg_mask = [[]] for i in range(len(support)): support_images[0].append(support[i][0].to(device)) support_fg_mask[0].append(support[i][1].to(device)) support_bg_mask[0].append(support[i][2].to(device)) query_images = [] query_labels = [] for i in range(len(query)): query_images.append(query[i][0].to(device)) query_labels.append(query[i][1].to(device)) query_labels = torch.cat(query_labels, dim=0).long().to(device) query_pred, _ = model(support_images, support_fg_mask, support_bg_mask, query_images) print("Support ", i_iter) for i in range(len(support)): plt.subplot(1, 2 * len(support), 2 * i + 1) try: plt.imshow( np.moveaxis(support[i][0].squeeze().cpu().detach().numpy(), 0, 2)) except np.AxisError: plt.imshow(support[i][0].squeeze().cpu().detach().numpy()) plt.subplot(1, 2 * len(support), 2 * i + 2) plt.imshow(support[i][1].squeeze()) plt.show() print("Query ", i_iter) for i in range(len(query)): plt.subplot(1, 3 * len(query), 3 * i + 1) try: plt.imshow( np.moveaxis(query[i][0].squeeze().cpu().detach().numpy(), 0, 2)) except np.AxisError: plt.imshow(query[i][0].squeeze().cpu().detach().numpy()) plt.subplot(1, 3 * len(query), 3 * i + 2) plt.imshow(query[i][1].squeeze()) plt.subplot(1, 3 * len(query), 3 * i + 3) plt.imshow(np.array(query_pred.argmax(dim=1)[i].cpu())) metric.record(np.array(query_pred.argmax(dim=1)[i].cpu()), np.array(query_labels[i].cpu()), n_run=0) plt.show() classIoU, meanIoU = metric.get_mIoU(n_run=0) classIoU_binary, meanIoU_binary = metric.get_mIoU_binary(n_run=0) print('classIoU', classIoU.tolist()) print('meanIoU', meanIoU.tolist()) print('classIoU_binary', classIoU_binary.tolist()) print('meanIoU_binary', meanIoU_binary.tolist()) print('classIoU: {}'.format(classIoU)) print('meanIoU: {}'.format(meanIoU)) print('classIoU_binary: {}'.format(classIoU_binary)) print('meanIoU_binary: {}'.format(meanIoU_binary))
def build_transforms(height, width, transforms='random_flip', norm_mean=[0.485, 0.456, 0.406], norm_std=[0.229, 0.224, 0.225], **kwargs): """Builds train and test transform functions. Args: height (int): target image height. width (int): target image width. transforms (str or list of str, optional): transformations applied to model training. Default is 'random_flip'. norm_mean (list or None, optional): normalization mean values. Default is ImageNet means. norm_std (list or None, optional): normalization standard deviation values. Default is ImageNet standard deviation values. """ if transforms is None: transforms = [] if isinstance(transforms, str): transforms = [transforms] if not isinstance(transforms, list): raise ValueError( 'transforms must be a list of strings, but found to be {}'.format( type(transforms))) if len(transforms) > 0: transforms = [t.lower() for t in transforms] if norm_mean is None or norm_std is None: norm_mean = [0.485, 0.456, 0.406] # imagenet mean norm_std = [0.229, 0.224, 0.225] # imagenet std normalize = Normalize(mean=norm_mean, std=norm_std) print('Building train transforms ...') transform_tr = [] print('+ resize to {}x{}'.format(height, width)) transform_tr += [Resize((height, width))] if 'random_flip' in transforms: print('+ random flip') transform_tr += [RandomHorizontalFlip()] if 'random_crop' in transforms: print('+ random crop (enlarge to {}x{} and ' 'crop {}x{})'.format(int(round(height * 1.125)), int(round(width * 1.125)), height, width)) transform_tr += [Random2DTranslation(height, width)] if 'random_patch' in transforms: print('+ random patch') transform_tr += [RandomPatch()] if 'color_jitter' in transforms: print('+ color jitter') transform_tr += [ ColorJitter(brightness=0.2, contrast=0.15, saturation=0, hue=0) ] print('+ to torch tensor of range [0, 1]') transform_tr += [ToTensor()] print('+ normalization (mean={}, std={})'.format(norm_mean, norm_std)) transform_tr += [normalize] if 'random_erase' in transforms: print('+ random erase') transform_tr += [RandomErasing(mean=norm_mean)] transform_tr = Compose(transform_tr) print('Building test transforms ...') print('+ resize to {}x{}'.format(height, width)) print('+ to torch tensor of range [0, 1]') print('+ normalization (mean={}, std={})'.format(norm_mean, norm_std)) transform_te = Compose([ Resize((height, width)), ToTensor(), normalize, ]) return transform_tr, transform_te
from PIL import Image from torchvision.models import resnet101, resnet18, vgg16, alexnet from torchvision.transforms import ToTensor, Resize, Compose import matplotlib.pyplot as plt device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # create a model model = resnet18(pretrained=True) cat = Image.open("/home/francesco/Documents/mirror/cat.jpg") # resize the image and make it a tensor input = Compose([Resize((224, 224)), ToTensor()])(cat) # add 1 dim for batch input = input.unsqueeze(0) # call mirror with the input and the model layers = list(model.modules()) layer = layers[50] print(layer) def imshow(tensor): tensor = tensor.squeeze() img = tensor.permute(1, 2, 0).cpu().numpy() plt.imshow(img) plt.show()