def __getitem__(self, index):
        img_name = self.data['full_path'][index]
        image = io.imread(img_name)
        image = self.transform(fromarray(image))
        target = self.labels[index]
        target = np.array([target])
        target = target.astype('float32').reshape(len(target), -1)
        target = torch.from_numpy(target)

        return image, target


image_size = (128, 128)

transform_train = Compose([
    Resize(image_size),
    Grayscale(),
    RandomHorizontalFlip(),
    RandomAffine(degrees=20, shear=(-0.2, 0.2, -0.2, 0.2), scale=(0.8, 1.2)),
    ToTensor()
])

transform_test = Compose([Resize(image_size), Grayscale(), ToTensor()])

xray_data_train, xray_data_test = train_test_split(xray_data,
                                                   test_size=0.2,
                                                   shuffle=False)

dataset_train = XRayDatasetFromCSV(xray_data_train, transform_train)
dataset_test = XRayDatasetFromCSV(xray_data_test, transform_test)
生成网络
'''
Net_G = Generator()
Net_D = Discriminator()
Net_G = DataParallel(Net_G)
Net_D = DataParallel(Net_D)
if GPU_NUMS > 1:
    Net_D.cuda()
    Net_G.cuda()
G_optimizer = Adam(Net_G.parameters(), lr=LR, betas=BETAS)
D_optimizer = Adam(Net_D.parameters(), lr=LR, betas=BETAS)
'''
数据读入与预处理
'''
transforms = Compose([
    Resize(IMAGE_SIZE),
    CenterCrop(IMAGE_SIZE),
    ToTensor(),
    Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

dataset = ImageFolder(root='../ganData/face_gender/', transform=transforms)
train_loader = torch.utils.data.DataLoader(dataset,
                                           batch_size=BATCH_SIZE,
                                           shuffle=True)


def one_hot(target):
    y = torch.zeros(target.size()[0], 10)

    for i in range(target.size()[0]):
Exemple #3
0
    def __init__(self,
                 image_paths,
                 true_bboxes,
                 playout_episode=False,
                 premasking=True,
                 mode='train',
                 max_steps_per_image=200,
                 seed=None,
                 bbox_scaling_w=0.05,
                 bbox_scaling_h=0.1,
                 bbox_transformer='base',
                 has_termination_action=True,
                 has_intermediate_reward=False,
                 ior_marker_type='cross',
                 history_length=10,
                 assessor_model=None,
                 train_assessor=False,
                 grayscale=False,
                 use_cut_area=False):
        """
        :param image_paths: The paths to the individual images
        :param true_bboxes: The true bounding boxes for each image
        :type image_paths: String or list
        :type true_bboxes: numpy.ndarray
        """
        # Determines whether the agent is training or testing
        # Optimizations can be applied during training that are not allowed for testing
        self.mode = mode
        # Factor for scaling all bounding boxes relative to their size
        self.bbox_scaling_w = bbox_scaling_w
        self.bbox_scaling_h = bbox_scaling_h
        # Whether IoR markers will be placed upfront after loading the image
        self.premasking = premasking
        # Whether an episode terminates after a single trigger or is played out until the end
        self.playout_episode = playout_episode
        # Episodes will be terminated automatically after reaching max steps
        self.max_steps_per_image = max_steps_per_image
        # Whether a termination action should be provided in the action set
        self.has_termination_action = has_termination_action
        # Whether a reward will be given for each non-trigger action based on the best gt iou
        self.has_intermediate_reward = has_intermediate_reward
        # The type of IoR marker to be used when masking trigger regions
        self.ior_marker_type = ior_marker_type
        # Length of history in state & agent model
        self.history_length = history_length
        # Whether to return grayscale, 1-channel environment images
        self.grayscale = grayscale
        # Use tightness-aware IoU for reward (incorporating cut gt)
        self.use_cut_area = use_cut_area

        # Initialize action space
        self.bbox_transformer = create_bbox_transformer(bbox_transformer)
        self.action_space = spaces.Discrete(len(self.action_set))
        if self.grayscale:
            # 224*224*1 (RGB image) + 9 * 10 (on-hot-enconded history)
            self.observation_space = spaces.Tuple([
                spaces.Box(low=0, high=256, shape=(450, 450, 1)),
                spaces.Box(low=0,
                           high=1,
                           shape=(self.history_length, len(self.action_set)))
            ])
        else:
            # 224*224*3 (RGB image) + 9 * 10 (on-hot-enconded history) = 150618
            self.observation_space = spaces.Tuple([
                spaces.Box(low=0, high=256, shape=(450, 450, 3)),
                spaces.Box(low=0,
                           high=1,
                           shape=(self.history_length, len(self.action_set)))
            ])

        # Initialize dataset
        if type(image_paths) is not list:
            image_paths = [image_paths]
        self.image_paths = image_paths
        self.true_bboxes = [[TextLocEnv.to_standard_box(b) for b in bboxes]
                            for bboxes in true_bboxes]

        # For registering a handler that will be executed once after a step
        self.post_step_handler = None

        # Episode-specific

        # Image for the current episode
        self.episode_image = None
        self.current_image_index = 0
        # Ground truth bounding boxes for the current episode image
        self.episode_true_bboxes = None
        # Predicted bounding boxes for the current episode image
        self.episode_pred_bboxes = None
        # IoU values for each trigger in the current episode
        self.episode_trigger_ious = None
        # List of indices of masked bounding boxes for the current episode image
        self.episode_masked_indices = []
        # Number of trigger actions used so far
        self.num_triggers_used = 0
        # Number of episodes rolled out so far
        self.episode_count = 0
        # ID of last action taken
        self.last_action_taken = -1

        # For rendering
        self.viewer = None

        # Assessor (weak-supervision)
        self.assessor = assessor_model
        self.train_assessor = train_assessor

        self.resize = Resize((450, 450),
                             interpolation=InterpolationMode.NEAREST)

        self.seed(seed=seed)
        self.reset()
Exemple #4
0
else:
    import torch as torch
import torchvision
from torchvision.transforms import Normalize, ToTensor, Resize
from main import dataset
from fashion_model import FashionModel
from sklearn.preprocessing import MultiLabelBinarizer

torch.manual_seed(42)

if __name__ == "__main__":
    test_folder = '/media/spike/Scoob/materialist_fashion_test/'
    cvs_filename = input("[?] Output csv name: ")  # '001'
    use_cuda = torch.cuda.is_available()
    image_size = 224
    scale = Resize((image_size, image_size))
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    tforms = torchvision.transforms.Compose([scale, ToTensor(), normalize])
    batch = 32
    total_images = 39706
    mf_test_set = dataset.TestMaterialistFashion(test_folder, total_images,
                                                 tforms)
    mf_test_loader = torch.utils.data.DataLoader(mf_test_set,
                                                 batch_size=batch,
                                                 shuffle=False,
                                                 num_workers=8)
    print("Size of test loader: {}".format(len(mf_test_loader)))
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = FashionModel()
    model_state = torch.load('tmp/model_state_best.th')
Exemple #5
0
def input_transform(crop_size, upscale_factor):
    return Compose([
        CenterCrop(crop_size),
        Resize(crop_size // upscale_factor),
        ToTensor(),
    ])
Exemple #6
0
    def initialize(self):
        # We need to initialize the model inside self.run and not self.__init__
        # to ensure that the model loads in the correct thread.
        config_path = 'expt/nytimes/9_transformer_objects/config.yaml'
        logger.info(f'Loading config from {config_path}')
        config = yaml_to_params(config_path, overrides='')
        prepare_environment(config)
        vocab = Vocabulary.from_params(config.pop('vocabulary'))
        model = Model.from_params(vocab=vocab, params=config.pop('model'))
        model = model.eval()

        model_path = 'expt/nytimes/9_transformer_objects/serialization/best.th'
        logger.info(f'Loading best model from {model_path}')
        best_model_state = torch.load(model_path,
                                      map_location=torch.device('cpu'))
        model.load_state_dict(best_model_state)

        self.model = model.to(self.device)

        logger.info('Loading roberta model.')
        roberta = torch.hub.load('pytorch/fairseq:2f7e3f3323', 'roberta.base')
        self.bpe = roberta.bpe
        self.indices = roberta.task.source_dictionary.indices

        logger.info('Loading face detection model.')
        self.mtcnn = MTCNN(keep_all=True, device=self.device)
        self.inception = InceptionResnetV1(pretrained='vggface2').eval()

        self.resnet = resnet152()
        self.resnet = self.resnet.to(self.device).eval()

        cfg = 'tell/yolov3/cfg/yolov3-spp.cfg'
        weight_path = 'data/yolov3-spp-ultralytics.pt'
        self.darknet = Darknet(cfg, img_size=416)
        attempt_download(weight_path)
        self.darknet.load_state_dict(
            torch.load(weight_path, map_location=self.device)['model'])
        self.darknet.to(self.device).eval()

        # Get names and colors
        self.names = load_classes('tell/yolov3/data/coco.names')
        random.seed(123)
        self.colors = [[random.randint(0, 255) for _ in range(3)]
                       for _ in range(len(self.names))]

        self.preprocess = Compose([
            Resize(256),
            CenterCrop(224),
            ToTensor(),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

        data_iterator = BasicIterator(batch_size=4)
        data_iterator.index_with(model.vocab)
        self.data_iterator = data_iterator

        self.tokenizer = Tokenizer.from_params(
            config.get('dataset_reader').get('tokenizer'))

        indexer_params = config.get('dataset_reader').get('token_indexers')

        self.token_indexers = {
            k: TokenIndexer.from_params(p)
            for k, p in indexer_params.items()
        }
Exemple #7
0
"""
Check the time for processing images only
"""
# Dataset
from utils.datasets import DeepFashionDataset
from torchvision.transforms import Compose
from torchvision.transforms import Resize
from torchvision.transforms import ToTensor
from torchvision.transforms import Normalize
from config.deep_fashion import DeepFashionConfig as cfg
from torch.utils.data import DataLoader
from utils.datasets import Siamesize
from time import time
trans = Compose([
    Resize(cfg.sizes),
    ToTensor(),
    Normalize(cfg.mean, cfg.std),
])
# dataset
train_ds = DeepFashionDataset(cfg.root_dir, 'train', transform=trans)

siamese_train_ds = Siamesize(train_ds)
loader_kwargs = {
    'pin_memory': True,
    'batch_size': 100,
    'num_workers': 16,
}
s_train_loader = DataLoader(siamese_train_ds, **loader_kwargs)

device = "cuda"
for _ in range(1):
Exemple #8
0
        plt.plot(trainErrsTotal, '-', label="train total", color=(0.5, 0, 0.8))

        #plt.plot( testErrsTotal, '-', label = "test total", color = (0.5,0.8,0) )

        plt.yscale('log')
        plt.grid(True)
        plt.legend()
        plt.savefig("./errors")

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    img_size = (100, 100)
    composed = Compose([
        ToPILImage(),
        Resize(img_size),
        RandomHorizontalFlip(),
        RandomGrayscale(p=0.5),
        RandomRotation(degrees=30, center=None),
        ToTensor(), normalize
    ])

    train_dataset = HumpbackWhaleDataset(csv_file='./train_no_nu_whales.csv',
                                         root_dir="./train",
                                         transform=composed)
    #test_dataset = TitanicDataset(csvFile = 'test.csv')

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=50,
                                               shuffle=True,
                                               num_workers=4)
Exemple #9
0
 def imagenet():
     normalize = Normalize(
         mean=[0.485, 0.456, 0.406],
         std=[0.229, 0.224, 0.225]
     )
     return Compose([Resize(size=(224, 224)), ToTensor(), normalize])
    def __init__(self,
                 mode,
                 roidb_file=VG_SGG_FN,
                 dict_file=VG_SGG_DICT_FN,
                 image_file=IM_DATA_FN,
                 filter_empty_rels=True,
                 num_im=-1,
                 num_val_im=5000,
                 filter_duplicate_rels=True,
                 filter_non_overlap=True,
                 use_proposals=False):
        """
        Torch dataset for VisualGenome
        :param mode: Must be train, test, or val
        :param roidb_file:  HDF5 containing the GT boxes, classes, and relationships
        :param dict_file: JSON Contains mapping of classes/relationships to words
        :param image_file: HDF5 containing image filenames
        :param filter_empty_rels: True if we filter out images without relationships between
                             boxes. One might want to set this to false if training a detector.
        :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead
        :param num_im: Number of images in the entire dataset. -1 for all images.
        :param num_val_im: Number of images in the validation set (must be less than num_im
               unless num_im is -1.)
        :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN
            proposals
        """
        if mode not in ('test', 'train', 'val'):
            raise ValueError(
                "Mode must be in test, train, or val. Supplied {}".format(
                    mode))
        self.mode = mode

        # Initialize
        self.roidb_file = roidb_file
        self.dict_file = dict_file
        self.image_file = image_file
        self.filter_non_overlap = filter_non_overlap
        self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train'

        self.split_mask, self.gt_boxes, self.gt_classes, self.relationships = load_graphs(
            self.roidb_file,
            self.mode,
            num_im,
            num_val_im=num_val_im,
            filter_empty_rels=filter_empty_rels,
            filter_non_overlap=self.filter_non_overlap and self.is_train,
        )

        self.filenames = load_image_filenames(image_file)
        # self.filenames = [self.filenames[i] for i in np.where(self.split_mask)[0]]

        self.ind_to_classes, self.ind_to_predicates = load_info(dict_file)

        if use_proposals:
            print("Loading proposals", flush=True)
            p_h5 = h5py.File(PROPOSAL_FN, 'r')
            rpn_rois = p_h5['rpn_rois']
            rpn_scores = p_h5['rpn_scores']
            rpn_im_to_roi_idx = np.array(
                p_h5['im_to_roi_idx'][self.split_mask])
            rpn_num_rois = np.array(p_h5['num_rois'][self.split_mask])

            self.rpn_rois = []
            for i in range(len(self.filenames)):
                rpn_i = np.column_stack((
                    rpn_scores[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] +
                               rpn_num_rois[i]],
                    rpn_rois[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] +
                             rpn_num_rois[i]],
                ))
                self.rpn_rois.append(rpn_i)
        else:
            self.rpn_rois = None

        # You could add data augmentation here. But we didn't.
        # tform = []
        # if self.is_train:
        #     tform.append(RandomOrder([
        #         Grayscale(),
        #         Brightness(),
        #         Contrast(),
        #         Sharpness(),
        #         Hue(),
        #     ]))

        tform = [
            SquarePad(),
            Resize(IM_SCALE),
            ToTensor(),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
        self.transform_pipeline = Compose(tform)
Exemple #11
0
    metavar='N',
    help='how many batches to wait before logging training status')

args = parser.parse_args()
#args.cuda = not args.no_cuda and torch.cuda.is_available()
args.cuda = False
if args.cuda:
    torch.cuda.manual_seed(args.seed)

receptive_filter_size = 4
hidden_size = 320
image_size_w = 32
image_size_h = 32

input_transform = Compose([
    Resize((32, 32)),
    ToTensor(),
    Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
target_transform = Compose([
    Resize((32, 32)),
    ToLabel(),
])

#trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
#                                        download=True, transform=transform)
#trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size,
#                                          shuffle=True, num_workers=2)
trainloader = DataLoader(train(input_transform, target_transform),
                         num_workers=1,
                         batch_size=1,
Exemple #12
0
from PIL import Image


class Faces(Dataset):
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform
        self.images = glob.glob(os.path.join(root, '*.jpg'))

    def __getitem__(self, index):
        image = Image.open(self.images[index]).convert('RGB')
        if self.transform is not None:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.images)


if __name__ == '__main__':
    from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
    transform = Compose([
        Resize(64),
        CenterCrop(64),
        ToTensor(),
        Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    root = '/home/zsh_o/work/data/faces/'
    dataset = Faces(root=root, transform=transform)
    print(dataset[0])
Exemple #13
0
    def __call__(self, input, target):
        # do something to both images
        input = Resize(self.height, Image.BILINEAR)(input)
        target = Resize(self.height, Image.NEAREST)(target)

        if (self.augment):
            # Random hflip
            hflip = random.random()
            if (hflip < 0.5):
                input = input.transpose(Image.FLIP_LEFT_RIGHT)
                target = target.transpose(Image.FLIP_LEFT_RIGHT)

            #Random translation 0-2 pixels (fill rest with padding
            transX = random.randint(-2, 2)
            transY = random.randint(-2, 2)

            input = ImageOps.expand(input,
                                    border=(transX, transY, 0, 0),
                                    fill=0)
            target = ImageOps.expand(target,
                                     border=(transX, transY, 0, 0),
                                     fill=255)  #pad label filling with 255
            input = input.crop(
                (0, 0, input.size[0] - transX, input.size[1] - transY))
            target = target.crop(
                (0, 0, target.size[0] - transX, target.size[1] - transY))

        input = ToTensor()(input)
        if (self.enc):
            target = Resize(int(self.height / 8), Image.NEAREST)(target)
        target = ToLabel()(target)
        target = Relabel(255, 19)(target)

        return input, target
Exemple #14
0
def main():
    # See all possible arguments in src/transformers/training_args.py
    # or by passing the --help flag to this script.
    # We now keep distinct sets of args, for a cleaner separation of concerns.

    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
        # If we pass only one argument to the script and it's the path to a json file,
        # let's parse it to get our arguments.
        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
    else:
        model_args, data_args, training_args = parser.parse_args_into_dataclasses()

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        handlers=[logging.StreamHandler(sys.stdout)],
    )

    log_level = training_args.get_process_log_level()
    logger.setLevel(log_level)
    transformers.utils.logging.set_verbosity(log_level)
    transformers.utils.logging.enable_default_handler()
    transformers.utils.logging.enable_explicit_format()

    # Log on each process the small summary:
    logger.warning(
        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
    )
    logger.info(f"Training/evaluation parameters {training_args}")

    # Detecting last checkpoint.
    last_checkpoint = None
    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
        last_checkpoint = get_last_checkpoint(training_args.output_dir)
        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
            raise ValueError(
                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
                "Use --overwrite_output_dir to overcome."
            )
        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
            logger.info(
                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
            )

    # Initialize our dataset and prepare it for the 'image-classification' task.
    ds = load_dataset(
        data_args.dataset_name,
        data_args.dataset_config_name,
        data_files=data_args.data_files,
        cache_dir=model_args.cache_dir,
        task="image-classification",
    )

    # Define torchvision transforms to be applied to each image.
    normalize = Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    _train_transforms = Compose(
        [
            RandomResizedCrop(data_args.image_size),
            RandomHorizontalFlip(),
            ToTensor(),
            normalize,
        ]
    )
    _val_transforms = Compose(
        [
            Resize(data_args.image_size),
            CenterCrop(data_args.image_size),
            ToTensor(),
            normalize,
        ]
    )

    def train_transforms(example_batch):
        """Apply _train_transforms across a batch."""
        example_batch["pixel_values"] = [_train_transforms(pil_loader(f)) for f in example_batch["image_file_path"]]
        return example_batch

    def val_transforms(example_batch):
        """Apply _val_transforms across a batch."""
        example_batch["pixel_values"] = [_val_transforms(pil_loader(f)) for f in example_batch["image_file_path"]]
        return example_batch

    # If we don't have a validation split, split off a percentage of train as validation.
    data_args.train_val_split = None if "validation" in ds.keys() else data_args.train_val_split
    if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0:
        split = ds["train"].train_test_split(data_args.train_val_split)
        ds["train"] = split["train"]
        ds["validation"] = split["test"]

    # Prepare label mappings.
    # We'll include these in the model's config to get human readable labels in the Inference API.
    labels = ds["train"].features["labels"].names
    label2id, id2label = dict(), dict()
    for i, label in enumerate(labels):
        label2id[label] = str(i)
        id2label[str(i)] = label

    # Load the accuracy metric from the datasets package
    metric = datasets.load_metric("accuracy")

    # Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
    # predictions and label_ids field) and has to return a dictionary string to float.
    def compute_metrics(p):
        """Computes accuracy on a batch of predictions"""
        return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)

    config = AutoConfig.from_pretrained(
        model_args.config_name or model_args.model_name_or_path,
        num_labels=len(labels),
        label2id=label2id,
        id2label=id2label,
        finetuning_task="image-classification",
        cache_dir=model_args.cache_dir,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )
    model = AutoModelForImageClassification.from_pretrained(
        model_args.model_name_or_path,
        from_tf=bool(".ckpt" in model_args.model_name_or_path),
        config=config,
        cache_dir=model_args.cache_dir,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )
    # NOTE - We aren't directly using this feature extractor since we defined custom transforms above.
    # We initialize this instance below and pass it to Trainer to ensure that the feature extraction
    # config, preprocessor_config.json, is included in output directories.
    # This way if we push a model to the hub, the inference widget will work.
    feature_extractor = AutoFeatureExtractor.from_pretrained(
        model_args.feature_extractor_name or model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
        size=data_args.image_size,
        image_mean=normalize.mean,
        image_std=normalize.std,
    )

    if training_args.do_train:
        if "train" not in ds:
            raise ValueError("--do_train requires a train dataset")
        if data_args.max_train_samples is not None:
            ds["train"] = ds["train"].shuffle(seed=training_args.seed).select(range(data_args.max_train_samples))
        # Set the training transforms
        ds["train"].set_transform(train_transforms)

    if training_args.do_eval:
        if "validation" not in ds:
            raise ValueError("--do_eval requires a validation dataset")
        if data_args.max_eval_samples is not None:
            ds["validation"] = (
                ds["validation"].shuffle(seed=training_args.seed).select(range(data_args.max_eval_samples))
            )
        # Set the validation transforms
        ds["validation"].set_transform(val_transforms)

    # Initalize our trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=ds["train"] if training_args.do_train else None,
        eval_dataset=ds["validation"] if training_args.do_eval else None,
        compute_metrics=compute_metrics,
        tokenizer=feature_extractor,
        data_collator=collate_fn,
    )

    # Training
    if training_args.do_train:
        checkpoint = None
        if training_args.resume_from_checkpoint is not None:
            checkpoint = training_args.resume_from_checkpoint
        elif last_checkpoint is not None:
            checkpoint = last_checkpoint
        train_result = trainer.train(resume_from_checkpoint=checkpoint)
        trainer.save_model()
        trainer.log_metrics("train", train_result.metrics)
        trainer.save_metrics("train", train_result.metrics)
        trainer.save_state()

    # Evaluation
    if training_args.do_eval:
        metrics = trainer.evaluate()
        trainer.log_metrics("eval", metrics)
        trainer.save_metrics("eval", metrics)

    # Write model card and (optionally) push to hub
    kwargs = {
        "finetuned_from": model_args.model_name_or_path,
        "tasks": "image-classification",
        "dataset": data_args.dataset_name,
        "tags": ["image-classification"],
    }
    if training_args.push_to_hub:
        trainer.push_to_hub(**kwargs)
    else:
        trainer.create_model_card(**kwargs)
Exemple #15
0
def train_panet(device, resume=False, dataset_name='voc'):
    pre_trained_encoder_path = '../data/vgg16-397923af.pth' if cfg['panet'][
        'use_pretrained'] else None
    model = PANetFewShotSeg(in_channels=cfg[dataset_name]['channels'],
                            pretrained_path=pre_trained_encoder_path,
                            cfg={
                                'align': True
                            },
                            encoder_type=cfg['panet']['backbone']).to(device)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=cfg['panet']['lr'],
                                momentum=cfg['panet']['momentum'],
                                weight_decay=cfg['panet']['weight_decay'])
    scheduler = MultiStepLR(optimizer,
                            milestones=cfg['panet']['lr_milestones'],
                            gamma=0.1)
    epoch = 0
    model.train()

    if resume:
        epoch = load_state(cfg[dataset_name]['model_name'], model, optimizer,
                           scheduler)

    if dataset_name == 'voc':
        transforms = Compose([
            Resize(size=cfg['panet']['vgg_inp_size']),
        ])
    elif dataset_name == 'ircadb':
        transforms = Compose([
            Resize(size=cfg['panet']['unet_inp_size']),
        ])

    if dataset_name == 'voc':
        train_dataset = get_pascal_few_shot_datasets(
            range(1, 16), cfg['panet']['train_iterations'], cfg['nshot'],
            cfg['nquery'], transforms)
    elif dataset_name == 'ircadb':
        train_dataset = get_ircadb_few_shot_datasets(
            organs=[
                "bone", "spleen", "leftkidney", "rightkidney", "leftlung",
                "rightlung", "gallbladder"
            ],
            patient_ids=range(1, 16),
            iterations=cfg['panet']['train_iterations'],
            N_shot=cfg['nshot'],
            N_query=cfg['nquery'],
            transforms=transforms)

    trainloader = DataLoader(train_dataset,
                             batch_size=1,
                             shuffle=True,
                             num_workers=1,
                             pin_memory=True,
                             drop_last=True)

    criterion = nn.CrossEntropyLoss(ignore_index=255)

    log_loss = {'loss': 0, 'align_loss': 0}
    for i_iter, (support, query) in enumerate(tqdm(trainloader)):

        support_images = [[]]
        support_fg_mask = [[]]
        support_bg_mask = [[]]
        for i in range(len(support)):
            # print(support[i][0].shape)
            support_images[0].append(support[i][0].to(device))
            support_fg_mask[0].append(support[i][1].to(device))
            support_bg_mask[0].append(support[i][2].to(device))

        query_images = []
        query_labels = []

        for i in range(len(query)):
            query_images.append(query[i][0].to(device))
            query_labels.append(query[i][1].to(device))

        query_labels = torch.cat(query_labels, dim=0).long().to(device)

        # Forward and Backward
        optimizer.zero_grad()
        query_pred, align_loss = model(support_images, support_fg_mask,
                                       support_bg_mask, query_images)
        query_loss = criterion(query_pred, query_labels)
        loss = query_loss + align_loss * cfg['panet']['align_loss_scalar']
        loss.backward()
        optimizer.step()
        scheduler.step()

        # Log loss
        query_loss = query_loss.detach().data.cpu().numpy()
        align_loss = align_loss.detach().data.cpu().numpy(
        ) if align_loss != 0 else 0
        log_loss['loss'] += query_loss
        log_loss['align_loss'] += align_loss

        # print loss and take snapshots
        if (i_iter + 1) % cfg['panet']['save_period'] == 0:
            loss = log_loss['loss'] / (i_iter + 1)
            align_loss = log_loss['align_loss'] / (i_iter + 1)
            print('\nstep {}: loss: {}, align_loss: {}'.format(
                i_iter + 1, loss, align_loss))
        if (i_iter + 1) % cfg['panet']['save_period'] == 0:
            save_state(cfg[dataset_name]['model_name'], model, optimizer,
                       scheduler, epoch + i_iter + 1)
            print("\nModel Saved On Iteration {} ...".format(epoch + i_iter +
                                                             1))

    return model
Exemple #16
0
def train_lr_transform(crop_size, upscale_factor):
    return Compose([
        ToPILImage(),
        Resize(crop_size // upscale_factor, interpolation=Image.BICUBIC),
        ToTensor()
    ])
# Losses
reconstruction_loss = None
if ARGUMENTS.loss == 'l1':
    reconstruction_loss = nn.L1Loss().to(device)
elif ARGUMENTS.loss == 'lpips':
    reconstruction_loss = LPIPS(device).to(device)

envmap_loss = None
if envmap_colorspace == 'rgb':
    envmap_loss = log_l2_loss
else:
    envmap_loss = hsv_envmap_loss

# Configure data sets
transform = Resize(SIZE)
pairing_strategies = [DifferentScene(), DifferentLightDirection()]
train_dataset = InputTargetGroundtruthWithGeneratedEnvmapDataset(
    transform=transform,
    pairing_strategies=pairing_strategies,
    mode=envmap_colorspace)
test_dataset = InputTargetGroundtruthWithGeneratedEnvmapDataset(
    data_path=VALIDATION_DATA_PATH,
    transform=transform,
    pairing_strategies=pairing_strategies,
    mode=envmap_colorspace)

# Configure data loaders
# Sub-sampling:
# https://discuss.pytorch.org/t/train-on-a-fraction-of-the-data-set/16743/2
# https://discuss.pytorch.org/t/torch-equivalent-of-numpy-random-choice/16146/5
Exemple #18
0
def valid_hr_transform(shape, upscale_factor):
    return Compose([
        ToTensor(),
        Resize((shape[0] // upscale_factor, shape[1] // upscale_factor),
               interpolation=Image.BICUBIC)
    ])
Exemple #19
0
def main():

    args = arguments()

    if torch.cuda.is_available():
        device = torch.device("cuda:0")  # Can continue going on here, like cuda:1 cuda:2....etc.
        print("Running on the GPU")
    else:
        device = torch.device("cpu")
        print("Running on the CPU")

    transforms = Compose([Resize((50, 50)), ToTensor()])
    dataset = ImageFolder("Data_WetSeason", transform=transforms)
    testset = ImageFolder("Test_WetSeason", transform=transforms)
    INPUT_SIZE = dataset[0][0].shape
    """
    train_val_len = int(0.9 * len(dataset))
    test_len = int(len(dataset) - train_val_len)
    train_len = int(0.8 * 0.9 * len(dataset))
    val_len = int(len(dataset) - test_len - train_len)
    """
    """train_len = int(0.7 * len(dataset))
    val_len = int(0.1 * len(dataset))
    test_len = int(len(dataset) - train_len - val_len)
    # train, test = random_split(dataset, lengths=(train_len, test_len))
    train, validation, test = random_split(dataset, lengths=(train_len, val_len, test_len))
    train_loader = DataLoader(train, batch_size=TRAIN_BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(validation, batch_size=VAL_BATCH_SIZE, shuffle=False)
    test_loader = DataLoader(test, batch_size=TEST_BATCH_SIZE, shuffle=False)
    # prediction_loader = DataLoader(dataset, batch_size=PRED_BATCH_SIZE)
    """
    train_len = int(0.8 * len(dataset))
    val_len = int(len(dataset) - train_len)
    train, val = random_split(dataset, lengths=(train_len, val_len))
    train_loader = DataLoader(train, batch_size=args.train_batch_size, shuffle=True)
    val_loader = DataLoader(val, batch_size=args.val_batch_size, shuffle=False)
    prediction_loader = DataLoader(testset, batch_size=args.pred_batch_size)

    net = Net(INPUT_SIZE).to(device)
    optimizer = optim.Adam(net.parameters(), lr=0.001)
    loss_function = nn.CrossEntropyLoss()

    # with open("CNN_model.log", "a") as f:
    for epoch in range(args.epochs):
        net.train()
        sum_acc = 0
        for x, y in train_loader:
            x = x.to(device)
            y = y.to(device)
            acc, loss = step(x, y, net=net, optimizer=optimizer, loss_function=loss_function, train=True)
            sum_acc += acc
        train_avg_acc = sum_acc / len(train_loader)
        print(f"Training accuracy: {train_avg_acc:.2f}")

        net.eval()
        sum_acc = 0
        for x, y in val_loader:
            x = x.to(device)
            y = y.to(device)
            val_acc, val_loss = step(x, y, net=net, optimizer=optimizer, loss_function=loss_function, train=True)
            sum_acc += val_acc
        val_avg_acc = sum_acc / len(val_loader)

        print(f"Validation accuracy: {val_avg_acc:.2f}")
        train_steps = len(train_loader) * (epoch + 1)
        wandb.log({"Train Accuracy": train_avg_acc, "Validation Accuracy": val_avg_acc}, step=train_steps)

    # train_preds = get_all_preds(net, test_loader)
    train_preds = get_all_preds(net, loader=prediction_loader, device=device)
    plt.figure(figsize=(10, 10))
    wandb.sklearn.plot_confusion_matrix(testset.targets, train_preds.argmax(dim=1), LABELS)
    precision, recall, f1_score, support = score(testset.targets, train_preds.argmax(dim=1))
    test_acc = accuracy_score(testset.targets, train_preds.argmax(dim=1))

    print(f"Test Accuracy: {test_acc}")
    print('precision: {}'.format(precision))
    print('recall: {}'.format(recall))
    print('f1_score: {}'.format(f1_score))
    print('support: {}'.format(support))
Exemple #20
0
    def __init__(self, base_path,data_args,data_split, sample_rate, max_sample_size=None, min_sample_size=None,
                 shuffle=True):
        super().__init__()

        self.data_args=data_args

        self.sample_rate = sample_rate

        self.fnames_audio = []
        self.fnames_text = []
        self.fnames_video = []

        self.sizes_audio = []
        self.sizes_video = []

        self.labels = {}

        #####Video Frame #####
        self.channels = 3
        self.timeDepth = 300
        self.xSize = 256
        self.ySize = 256

        IMAGE_SIZE=(self.xSize,self.ySize)
        self.transform = Compose([Resize(IMAGE_SIZE), ToTensor()])


        self.max_sample_size = max_sample_size if max_sample_size is not None else sys.maxsize
        self.min_sample_size = min_sample_size if min_sample_size is not None else self.max_sample_size
        self.base_manifest_path = base_path
        self.split = data_split

        if self.data_args.binary_target_iemocap: 
            included_emotions = ['neu','ang','sad','hap'] # 'exc', IEMOCAP  (Max 5 emotions (only take 4 in prior work))
        
        elif self.data_args.softmax_target_meld:

            print("We are using MELD for the softmax classification")

            included_emotions = ['neutral','sadness','surprise','joy','anger','fear','disgust'] #MELD (Max 7 emotion)
            #included_emotions = ['neutral','sadness','surprise','joy','anger']



        elif self.data_args.softmax_target_binary_meld:

            included_emotions = ['neutral','sadness','surprise','joy','anger','fear','disgust'] #MELD (Max 7 emotion)


        else:
            print("We are using MOSEI or MOSI to do a regression task")

        

        manifest_audio = os.path.join(self.base_manifest_path, '{}.tsv'.format(self.split+"_a"))
        manifest_text = os.path.join(self.base_manifest_path, '{}.tsv'.format(self.split+"_t"))
        manifest_video = os.path.join(self.base_manifest_path, '{}.tsv'.format(self.split+"_v"))
        
      
        
        manifest_label = os.path.join(self.base_manifest_path, '{}.csv'.format("label_file_"+self.split))


        with open(manifest_label, 'r') as f_l :
            self.root_dir_l = f_l.readline().strip()
            for line_l in f_l:

                items_l = line_l.strip().split(',')

                if self.data_args.regression_target_mos:                
                    self.labels[items_l[0].strip()] = np.round(float(items_l[1].strip()),decimals=4)
                else:
                    self.labels[items_l[0].strip()] = items_l[1].strip() #for the sentiment use 2 from the list else 1


        #inter_n=0
        with open(manifest_audio, 'r') as f_a, open(manifest_text, 'r') as f_t, open(manifest_video, 'r') as f_v :#, open(manifest_label, 'r') as f_l:
            self.root_dir_a = f_a.readline().strip()
            self.root_dir_t = f_t.readline().strip()
            self.root_dir_v = f_v.readline().strip()


            for line_a, line_t, line_v in zip(f_a,f_t,f_v):#,f_l):, line_l
           
                items_a = line_a.strip().split('\t')
                items_t = line_t.strip().split('\t')
                items_v = line_v.strip().split('\t')

                # inter_n=inter_n+1

                # if inter_n>5:
                #     break

                assert items_a[0].split('.')[0] == items_t[0].split('.')[0] == items_v[0].split('.')[0], "misalignment of data"
        
                emotion = self.labels.get(items_v[0].split('.')[0]) #If the label is not there, gives a none

            

                if self.data_args.regression_target_mos:

                    if self.data_args.eval_matric:
                        if emotion==0.0:
                            continue  

                    self.fnames_audio.append(items_a[0])
                    self.fnames_text.append(items_t[0])
                    self.fnames_video.append(items_v[0])
                    self.sizes_audio.append(1000000)     #This is used in the data loader np.lexsort but can remove it
                    self.sizes_video.append(1000000)
                
                else:
        
                    if emotion in included_emotions: # Only using the subset of emotions


                        self.fnames_audio.append(items_a[0])
                        self.fnames_text.append(items_t[0])
                        self.fnames_video.append(items_v[0])

                        self.sizes_audio.append(1000000)  
                        self.sizes_video.append(1000000)
                

                    
   

        if self.data_args.binary_target_iemocap:

            self.emotion_dictionary = { #EMOCAP
                'neu':0,
                'ang':2,
                'hap':3,
                'sad':1,
                #'exc':3
            }

        if self.data_args.softmax_target_meld: 

            self.emotion_dictionary = { #MELD
                'anger'  : 2,
                'joy':     3,
                'neutral': 0,
                'sadness': 1,
                'surprise':4,
                'fear':5,
                'disgust':6
            }

            # self.emotion_dictionary = { #MELD
            #     'anger'  : 2,
            #     'joy':     3,
            #     'neutral': 0,
            #     'sadness': 1,
            #     'surprise':4,
            #     #'fear':5,
            #     #'disgust':6
            # }

        if self.data_args.regression_target_mos:

            self.emotion_dictionary = {   #modei senti
                '-3'  : 6,
                '-2':     5,
                '-1': 4,
                '0': 0,
                '1':1,
                '2':2,
                '3':3
            }

            # self.emotion_dictionary = {   #modei senti 2 class
            
            #     '0': 0,
            #     '1':1
            # }
   
        self.shuffle = shuffle
Exemple #21
0
    def __call__(self, input, target):
        # do something to both images
        input = Scale(self.height, Image.BILINEAR)(input)
        target = Scale(self.height, Image.NEAREST)(target)

        if (self.augment):
            # Random hflip
            hflip = random.random()
            if (hflip < 0.5):
                input = input.transpose(Image.FLIP_LEFT_RIGHT)
                target = target.transpose(Image.FLIP_LEFT_RIGHT)

            degree = random.randint(-20, 20)
            input = input.rotate(degree, resample=Image.BILINEAR, expand=True)
            target = target.rotate(degree, resample=Image.NEAREST, expand=True)

            w, h = input.size
            nratio = random.uniform(0.5, 1.0)
            ni = random.randint(0, int(h - nratio * h))
            nj = random.randint(0, int(w - nratio * w))
            input = input.crop(
                (nj, ni, int(nj + nratio * w), int(ni + nratio * h)))
            target = target.crop(
                (nj, ni, int(nj + nratio * w), int(ni + nratio * h)))
            input = Resize((480, 640), Image.BILINEAR)(input)
            target = Resize((480, 640), Image.NEAREST)(target)

            brightness_factor = random.uniform(0.8, 1.2)
            contrast_factor = random.uniform(0.8, 1.2)
            saturation_factor = random.uniform(0.8, 1.2)
            #sharpness_factor=random.uniform(0.0,2.0)
            hue_factor = random.uniform(-0.2, 0.2)

            enhancer1 = ImageEnhance.Brightness(input)
            input = enhancer1.enhance(brightness_factor)

            enhancer2 = ImageEnhance.Contrast(input)
            input = enhancer2.enhance(contrast_factor)

            enhancer3 = ImageEnhance.Color(input)
            input = enhancer3.enhance(saturation_factor)

            #enhancer4=ImageEnhance.Sharpness(input)
            #input=enhancer4.enhance(sharpness_factor)

            input_mode = input.mode
            h, s, v = input.convert('HSV').split()
            np_h = np.array(h, dtype=np.uint8)
            with np.errstate(over='ignore'):
                np_h += np.uint8(hue_factor * 255)
            h = Image.fromarray(np_h, 'L')
            input = Image.merge('HSV', (h, s, v)).convert(input_mode)

        else:
            input = Resize((480, 640), Image.BILINEAR)(input)
            target = Resize((480, 640), Image.NEAREST)(target)

        input = ToTensor()(input)

        if (self.enc):
            target = Resize((60, 80), Image.NEAREST)(target)
        target = ToLabel()(target)
        target = Relabel(255, 27)(target)

        return input, target
Exemple #22
0
    if torch.cuda.is_available():
        print("CUDA available")
    else:
        print("CUDA not available")

    device = torch.device("cuda:0" if cuda_available else "cpu")

    # DATA
    SPLIT_RATIO: float = 0.7
    BATCH_SIZE: int = 32
    NUM_EPOCHS: int = 15

    movie_data_set: MovieSuccessDataset = MovieSuccessDataset(MOVIE_DATA_FILE,
                                                              POSTERS_DIR,
                                                              Dictionary(DATA_DIR / 'dict2000.json'),
                                                              Compose([Resize((299,
                                                                               299)),
                                                                       ToTensor()]))

    data_set_size: int = len(movie_data_set)
    print(f'Size of the data-set: {data_set_size}')

    train_data_set_size: int = int(data_set_size * SPLIT_RATIO)
    val_data_set_size: int = data_set_size - train_data_set_size
    train_dataset, val_dataset = torch.utils.data.random_split(movie_data_set, [train_data_set_size,
                                                                                val_data_set_size])

    weights: np.ndarray = get_class_weights(train_dataset)

    weighted_sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights))
    train_data_set_loader: DataLoader = DataLoader(train_dataset,
                                                   batch_size=BATCH_SIZE,
    if opts.model == 'siamese':
        model = siamese(opts.input_channels)
    elif opts.model == 'cnn_pairwise':
        model = CnnPairwise(opts.input_channels)
    elif opts.model == 'CRFN':
        model = CRFN(opts.input_channels)

    model = model.to(device)

    contrastive = ContrastiveLoss(margin=opts.margin)
    CE = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=opts.lr)
    optimizer.zero_grad()

    transform = Compose([Resize(105), ToTensor()])

    train_pairs = PairedImagesDataset(data_path=opts.train_data,
                                      size=40000,
                                      transform=transform,
                                      enable_fake_pairs=opts.enable_fake_pairs)
    val_pairs = PairedImagesDataset(data_path=opts.val_data,
                                    size=10000,
                                    transform=transform,
                                    enable_fake_pairs=opts.enable_fake_pairs)
    train_pairs_loader = DataLoader(dataset=train_pairs,
                                    batch_size=opts.batch_size,
                                    shuffle=True)

    val_pairs_loader = DataLoader(dataset=val_pairs,
                                  batch_size=opts.batch_size,
    def __init__(self):
        self.args = args

        self.input_transform = Compose([
            Resize((512, 512)),
            ToTensor(  # Resize((resize,resize)),
            ),
            Normalize([.485, .456, .406], [.229, .224, .225])
        ])
        self.label_transform = Compose(
            [Resize((512, 512)), ToLabel(),
             Relabel()])

        # self.net = model().cuda()
        # 相关性改成这种
        self.net = crate_Den_Resnet_model().cuda()
        # self.net = UnetResNetaddbn().cuda()
        # self.net = UnetResNet_dropout().cuda()
        # self.net = model_siamese_addbn_advance().cuda()
        # self.net = model_siamese_addbn_pooling_sort().cuda()
        # self.net = model_siamese_addbn_advance_pooling().cuda()
        # self.net = model_siamese_addbn_w_x_pooling().cuda()
        # checkpoint = torch.load(self.args.model_path)
        # self.net.load_state_dict(checkpoint,strict=True)
        #
        # self.net = torch.load(self.args.model_path).cuda()

        # checkpoint = torch.load('/home/gongxp/mlmr/githubcode/siamase_pytorch/resnet50_origin.pth')
        # self.net.load_state_dict(checkpoint, strict=False)
        self.train_data_loader = DataLoader(coseg_train_dataset(
            self.args.train_data, self.args.train_label, self.args.train_txt,
            self.input_transform, self.label_transform),
                                            num_workers=self.args.num_worker,
                                            batch_size=self.args.batch_size,
                                            shuffle=True)
        self.val_data_loader = DataLoader(coseg_val_dataset(
            self.args.val_data, self.args.val_label, self.args.val_txt,
            self.input_transform, self.label_transform),
                                          num_workers=self.args.num_worker,
                                          batch_size=self.args.batch_size,
                                          shuffle=False)
        self.optimizer = optim.Adam(self.net.parameters(),
                                    lr=self.args.lr,
                                    weight_decay=self.args.weight_decay)
        # self.optimizer = optim.SGD(self.net.parameters(), lr=self.args.lr, momentum=0.9, nesterov=True,weight_decay=self.args.weight_decay)
        #
        self.steps_per_epoch = int(
            np.ceil(
                get_file_len(self.args.train_txt) /
                float(self.args.batch_size)))
        self.scheduler = torch.optim.lr_scheduler.StepLR(
            self.optimizer, step_size=self.steps_per_epoch * 2, gamma=0.75)

        self.loss_func = nn.CrossEntropyLoss()
        self.focal_loss = FocalLoss2d()  # error

        self.BCEsoftJaccarddice = BCESoftJaccardDice()  # error
        self.BCESoftJaccarddice_rate_change = BCESoftJaccardDiceRateChange(
        )  # error
        # self.dice_loss = DiceLoss()  #
        summary(self.net, [(3, 512, 512), (3, 512, 512)])
Exemple #25
0
 def __init__(self, model, emotions, img_size=(224, 224), device=None):
     self.emotions = emotions
     self.transform = Compose([Resize(img_size), Grayscale(1), ToTensor()])
     self.device = torch.device('cpu') if device is None else device
     self.model = model.eval().to(self.device)
    gallery_labels = torch.cat(gallery_labels, dim=0).numpy()
    gallery_cams = torch.cat(gallery_cams, dim=0).numpy()
    Cmc, mAP = Video_Cmc(gallery_features, gallery_labels, gallery_cams,
                         dataloader.dataset.query_idx, 10000)
    network.train()

    return Cmc[0:20], mAP


if __name__ == '__main__':
    #Parse args
    args = parser.parse_args()

    # set transformation (H flip is inside dataset)
    train_transform = Compose([
        Resize((256, 128)),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    test_transform = Compose([
        Resize((256, 128)),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    print('Start dataloader...')
    train_dataloader = utils.Get_Video_train_DataLoader(args.train_txt,args.train_info, train_transform, shuffle=True,num_workers=args.num_workers,\
                                                        S=args.S,track_per_class=args.track_per_class,class_per_batch=args.class_per_batch)
    num_class = train_dataloader.dataset.n_id
    test_dataloader = utils.Get_Video_test_DataLoader(args.test_txt,args.test_info,args.query_info,test_transform,batch_size=args.batch_size,\
                                                 shuffle=False,num_workers=args.num_workers,S=args.S,distractor=True)
    print('End dataloader... n_id', num_class)
Exemple #27
0
        self.transforms = transforms

    def __call__(self, img, seed, mask_flag=False):
        for t in self.transforms:
            if isinstance(ColorJitter, t) and mask_flag:
                continue
            random.seed(seed)
            img = t(img)
        return img

train_img_aug = Compose_own([
        RandomAffine(90, shear=45),
        RandomRotation(90),
        RandomHorizontalFlip(),
        ColorJitter(),
        Resize(size=(img_size, img_size)),
        ToTensor()])

train_mask_aug = Compose_own([Resize(size=(img_size, img_size)),
        ToTensor()])


def __getitem__(self, index):
    img = Image.open(self.data[index]).convert('RGB')
    target = Image.open(self.data_labels[index])

    seed = np.random.randint(1000000)  # make a seed with numpy generator
    random.seed(seed)  # apply this seed to img tranfsorms
    if self.transform is not None:
        img = self.transform(img)
Exemple #28
0
def test_panet(device, model=None, dataset_name='voc', test_organ='liver'):
    if model is None:
        # pretrained_path='../data/vgg16-397923af.pth'
        model = PANetFewShotSeg(
            in_channels=cfg[dataset_name]['channels'],
            pretrained_path=None,
            cfg={
                'align': True
            },
            encoder_type=cfg['panet']['backbone']).to(device)
        load_state(cfg[dataset_name]['model_name'], model)

    model.eval()

    if dataset_name == 'voc':
        transforms = Compose([
            Resize(size=cfg['panet']['vgg_inp_size']),
        ])
    elif dataset_name == 'ircadb':
        transforms = Compose([
            Resize(size=cfg['panet']['unet_inp_size']),
        ])

    if dataset_name == 'voc':
        test_dataset = get_pascal_few_shot_datasets(
            range(16, 21), cfg['panet']['test_iterations'], cfg['nshot'],
            cfg['nquery'], transforms)
    elif dataset_name == 'ircadb':
        test_dataset = get_ircadb_few_shot_datasets(
            organs=[test_organ],
            patient_ids=range(16, 21),
            iterations=cfg['panet']['test_iterations'],
            N_shot=cfg['nshot'],
            N_query=cfg['nquery'],
            transforms=transforms)

    testloader = DataLoader(test_dataset,
                            batch_size=1,
                            shuffle=True,
                            num_workers=1,
                            pin_memory=True,
                            drop_last=True)

    metric = Metric(max_label=20, n_runs=1)
    for i_iter, (support, query) in enumerate(testloader):
        support_images = [[]]
        support_fg_mask = [[]]
        support_bg_mask = [[]]
        for i in range(len(support)):
            support_images[0].append(support[i][0].to(device))
            support_fg_mask[0].append(support[i][1].to(device))
            support_bg_mask[0].append(support[i][2].to(device))

        query_images = []
        query_labels = []

        for i in range(len(query)):
            query_images.append(query[i][0].to(device))
            query_labels.append(query[i][1].to(device))

        query_labels = torch.cat(query_labels, dim=0).long().to(device)

        query_pred, _ = model(support_images, support_fg_mask, support_bg_mask,
                              query_images)

        print("Support ", i_iter)
        for i in range(len(support)):
            plt.subplot(1, 2 * len(support), 2 * i + 1)
            try:
                plt.imshow(
                    np.moveaxis(support[i][0].squeeze().cpu().detach().numpy(),
                                0, 2))
            except np.AxisError:
                plt.imshow(support[i][0].squeeze().cpu().detach().numpy())
            plt.subplot(1, 2 * len(support), 2 * i + 2)
            plt.imshow(support[i][1].squeeze())
        plt.show()

        print("Query ", i_iter)

        for i in range(len(query)):
            plt.subplot(1, 3 * len(query), 3 * i + 1)
            try:
                plt.imshow(
                    np.moveaxis(query[i][0].squeeze().cpu().detach().numpy(),
                                0, 2))
            except np.AxisError:
                plt.imshow(query[i][0].squeeze().cpu().detach().numpy())
            plt.subplot(1, 3 * len(query), 3 * i + 2)
            plt.imshow(query[i][1].squeeze())
            plt.subplot(1, 3 * len(query), 3 * i + 3)
            plt.imshow(np.array(query_pred.argmax(dim=1)[i].cpu()))
            metric.record(np.array(query_pred.argmax(dim=1)[i].cpu()),
                          np.array(query_labels[i].cpu()),
                          n_run=0)
        plt.show()

    classIoU, meanIoU = metric.get_mIoU(n_run=0)
    classIoU_binary, meanIoU_binary = metric.get_mIoU_binary(n_run=0)

    print('classIoU', classIoU.tolist())
    print('meanIoU', meanIoU.tolist())
    print('classIoU_binary', classIoU_binary.tolist())
    print('meanIoU_binary', meanIoU_binary.tolist())
    print('classIoU: {}'.format(classIoU))
    print('meanIoU: {}'.format(meanIoU))
    print('classIoU_binary: {}'.format(classIoU_binary))
    print('meanIoU_binary: {}'.format(meanIoU_binary))
def build_transforms(height,
                     width,
                     transforms='random_flip',
                     norm_mean=[0.485, 0.456, 0.406],
                     norm_std=[0.229, 0.224, 0.225],
                     **kwargs):
    """Builds train and test transform functions.

    Args:
        height (int): target image height.
        width (int): target image width.
        transforms (str or list of str, optional): transformations applied to model training.
            Default is 'random_flip'.
        norm_mean (list or None, optional): normalization mean values. Default is ImageNet means.
        norm_std (list or None, optional): normalization standard deviation values. Default is
            ImageNet standard deviation values.
    """
    if transforms is None:
        transforms = []

    if isinstance(transforms, str):
        transforms = [transforms]

    if not isinstance(transforms, list):
        raise ValueError(
            'transforms must be a list of strings, but found to be {}'.format(
                type(transforms)))

    if len(transforms) > 0:
        transforms = [t.lower() for t in transforms]

    if norm_mean is None or norm_std is None:
        norm_mean = [0.485, 0.456, 0.406]  # imagenet mean
        norm_std = [0.229, 0.224, 0.225]  # imagenet std
    normalize = Normalize(mean=norm_mean, std=norm_std)

    print('Building train transforms ...')
    transform_tr = []

    print('+ resize to {}x{}'.format(height, width))
    transform_tr += [Resize((height, width))]

    if 'random_flip' in transforms:
        print('+ random flip')
        transform_tr += [RandomHorizontalFlip()]

    if 'random_crop' in transforms:
        print('+ random crop (enlarge to {}x{} and '
              'crop {}x{})'.format(int(round(height * 1.125)),
                                   int(round(width * 1.125)), height, width))
        transform_tr += [Random2DTranslation(height, width)]

    if 'random_patch' in transforms:
        print('+ random patch')
        transform_tr += [RandomPatch()]

    if 'color_jitter' in transforms:
        print('+ color jitter')
        transform_tr += [
            ColorJitter(brightness=0.2, contrast=0.15, saturation=0, hue=0)
        ]

    print('+ to torch tensor of range [0, 1]')
    transform_tr += [ToTensor()]

    print('+ normalization (mean={}, std={})'.format(norm_mean, norm_std))
    transform_tr += [normalize]

    if 'random_erase' in transforms:
        print('+ random erase')
        transform_tr += [RandomErasing(mean=norm_mean)]

    transform_tr = Compose(transform_tr)

    print('Building test transforms ...')
    print('+ resize to {}x{}'.format(height, width))
    print('+ to torch tensor of range [0, 1]')
    print('+ normalization (mean={}, std={})'.format(norm_mean, norm_std))

    transform_te = Compose([
        Resize((height, width)),
        ToTensor(),
        normalize,
    ])

    return transform_tr, transform_te
Exemple #30
0
from PIL import Image

from torchvision.models import resnet101, resnet18, vgg16, alexnet
from torchvision.transforms import ToTensor, Resize, Compose

import matplotlib.pyplot as plt

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# create a model
model = resnet18(pretrained=True)

cat = Image.open("/home/francesco/Documents/mirror/cat.jpg")
# resize the image and make it a tensor
input = Compose([Resize((224, 224)), ToTensor()])(cat)
# add 1 dim for batch
input = input.unsqueeze(0)
# call mirror with the input and the model
layers = list(model.modules())
layer = layers[50]
print(layer)


def imshow(tensor):
    tensor = tensor.squeeze()
    img = tensor.permute(1, 2, 0).cpu().numpy()
    plt.imshow(img)
    plt.show()