Beispiel #1
0
        def dense_process_data(index):
            images = list()
            for ind in indices['dense']:
                ptr = int(ind)

                if ptr <= record.num_frames:
                    imgs = self._load_image(record.path, ptr)
                else:
                    imgs = self._load_image(record.path, record.num_frames)
                images.extend(imgs)

            if self.phase == 'Fntest':

                images = [np.asarray(im) for im in images]
                clip_input = np.concatenate(images, axis=2)

                self.t = transforms.Compose([
                    transforms.Resize(256)])
                clip_input = self.t(clip_input)

                normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                 std=[0.229, 0.224, 0.225])

                if record.crop_pos == 0:
                    self.transform = transforms.Compose([

                        transforms.CenterCrop((256, 256)),

                        transforms.ToTensor(),
                        normalize,
                    ])
                elif record.crop_pos == 1:
                    self.transform = transforms.Compose([

                        transforms.CornerCrop2((256, 256),),

                        transforms.ToTensor(),
                        normalize,
                    ])
                elif record.crop_pos == 2:
                    self.transform = transforms.Compose([
                        transforms.CornerCrop1((256, 256)),
                        transforms.ToTensor(),
                        normalize,
                    ])

                return self.transform(clip_input)

            return self.transform(images)
Beispiel #2
0
def get_coco(root, image_set, transforms, mode='instances'):
    anno_file_template = "{}_{}2017.json"
    PATHS = {
        "train": ("train2017",
                  os.path.join("annotations",
                               anno_file_template.format(mode, "train"))),
        "val": ("val2017",
                os.path.join("annotations",
                             anno_file_template.format(mode, "val"))),
        # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val")))
    }

    t = [ConvertCocoPolysToMask()]

    if transforms is not None:
        t.append(transforms)
    transforms = T.Compose(t)

    img_folder, ann_file = PATHS[image_set]
    img_folder = os.path.join(root, img_folder)
    ann_file = os.path.join(root, ann_file)

    dataset = CocoDetection(img_folder, ann_file, transforms=transforms)

    if image_set == "train":
        dataset = _coco_remove_images_without_annotations(dataset)

    # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)])

    return dataset
Beispiel #3
0
def make_coco_transforms(image_set):

    normalize = T.Compose([T.ToTensor()])

    if image_set == 'train':
        return T.Compose([
            T.RandomHorizontalFlip(0.5),
            normalize,
        ])

    if image_set == 'val':
        return T.Compose([
            normalize,
        ])

    raise ValueError(f'unknown {image_set}')
    def trans(is_training = True):

        transforms = []
        transforms.append(T.ToTensor())
        if is_training:
            transforms.append(T.RandomHorizontalFlip(0.5))

        return T.Compose(transforms)
Beispiel #5
0
def main():
    global args, best_record
    args = parser.parse_args()

    if args.augment:
        transform_train = joint_transforms.Compose([
            joint_transforms.RandomCrop(256),
            joint_transforms.Normalize(),
            joint_transforms.ToTensor(),
        ])
    else:
        transform_train = None

    dataset_train = Data.WData(args.data_root, transform_train)
    dataloader_train = data.DataLoader(dataset_train,
                                       batch_size=args.batch_size,
                                       shuffle=True,
                                       num_workers=16)

    dataset_val = Data.WData(args.val_root, transform_train)
    dataloader_val = data.DataLoader(dataset_val,
                                     batch_size=args.batch_size,
                                     shuffle=None,
                                     num_workers=16)

    model = SFNet(input_channels=37, dilations=[2, 4, 8], num_class=2)

    # multi gpu
    model = torch.nn.DataParallel(model)

    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    model = model.cuda()
    cudnn.benchmark = True

    # define loss function (criterion) and pptimizer
    criterion = torch.nn.CrossEntropyLoss(ignore_index=-1).cuda()
    optimizer = torch.optim.SGD([{
        'params': get_1x_lr_params(model)
    }, {
        'params': get_10x_lr_params(model),
        'lr': 10 * args.learning_rate
    }],
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(dataloader_train, model, criterion, optimizer, epoch)

        # evaluate on validation set
        acc, mean_iou, val_loss = validate(dataloader_val, model, criterion,
                                           epoch)

        is_best = mean_iou > best_record['miou']
        if is_best:
            best_record['epoch'] = epoch
            best_record['val_loss'] = val_loss.avg
            best_record['acc'] = acc
            best_record['miou'] = mean_iou
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'val_loss': val_loss.avg,
                'accuracy': acc,
                'miou': mean_iou,
                'model': model,
            }, is_best)

        print(
            '------------------------------------------------------------------------------------------------------'
        )
        print('[epoch: %d], [val_loss: %5f], [acc: %.5f], [miou: %.5f]' %
              (epoch, val_loss.avg, acc, mean_iou))
        print(
            'best record: [epoch: {epoch}], [val_loss: {val_loss:.5f}], [acc: {acc:.5f}], [miou: {miou:.5f}]'
            .format(**best_record))
        print(
            '------------------------------------------------------------------------------------------------------'
        )
Beispiel #6
0
    help="Name of the dataset: ['facades', 'maps', 'cityscapes']")
parser.add_argument("--batch_size",
                    type=int,
                    default=1,
                    help="Size of the batches")
parser.add_argument("--lr",
                    type=float,
                    default=0.0002,
                    help="Adams learning rate")
args = parser.parse_args()

device = ('cuda:0' if torch.cuda.is_available() else 'cpu')

transforms = T.Compose([
    T.Resize((256, 256)),
    T.ToTensor(),
    T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
# models
print('Defining models!')
generator = UnetGenerator().to(device)
discriminator = ConditionalDiscriminator().to(device)
# optimizers
g_optimizer = torch.optim.Adam(generator.parameters(),
                               lr=args.lr,
                               betas=(0.5, 0.999))
d_optimizer = torch.optim.Adam(discriminator.parameters(),
                               lr=args.lr,
                               betas=(0.5, 0.999))
# loss functions
g_criterion = GeneratorLoss(alpha=100)
Beispiel #7
0
    print ("Preprocessing..")
    preprocessing()
    print ("Preprocessing finished!")

cuda_available = torch.cuda.is_available()

# directory results
if not os.path.exists(RESULTS_PATH):
    os.makedirs(RESULTS_PATH)

# Load dataset
mean=m
std_dev=s

transform = transforms.Compose([transforms.Resize((224,224)),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean, std_dev)])

training_set = LocalDataset(IMAGES_PATH, TRAINING_PATH, transform=transform)
validation_set = LocalDataset(IMAGES_PATH, VALIDATION_PATH, transform=transform)

training_set_loader = DataLoader(dataset=training_set, batch_size=BATCH_SIZE, num_workers=THREADS, shuffle=True)
validation_set_loader = DataLoader(dataset=validation_set, batch_size=BATCH_SIZE, num_workers=THREADS, shuffle=False)

def train_model(model_name, model, lr=LEARNING_RATE, epochs=EPOCHS, momentum=MOMENTUM, weight_decay=0, train_loader=training_set_loader, test_loader=validation_set_loader):

    if not os.path.exists(RESULTS_PATH + "/" + model_name):
        os.makedirs(RESULTS_PATH + "/" + model_name)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay)
Beispiel #8
0
import gc

# directory results
if not os.path.exists(RESULTS_PATH):
    os.makedirs(RESULTS_PATH)

cuda_available = torch.cuda.is_available()

# Load dataset

# pre-computed mean and standard_deviation
mean = torch.Tensor([0.3877, 0.3647, 0.3547])
std_dev = torch.Tensor([0.2121, 0.2106, 0.2119])

transform = transforms.Compose([transforms.CenterCrop(224),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean, std_dev)])

training_set = LocalDataset(IMAGES_PATH, TRAINING_PATH, transform=transform)
validation_set = LocalDataset(IMAGES_PATH, VALIDATION_PATH, transform=transform)
test_set = LocalDataset(IMAGES_PATH, TEST_PATH, transform=transform)

training_set_loader = DataLoader(dataset=training_set, batch_size=BATCH_SIZE, num_workers=THREADS, shuffle=True)
validation_set_loader = DataLoader(dataset=validation_set, batch_size=BATCH_SIZE, num_workers=THREADS, shuffle=False)
test_set_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, num_workers=THREADS, shuffle=False)

if REGRESSION:
    classes = {"num_classes": 4}
else:
    classes = {"num_classes": 16}
Beispiel #9
0
import onnx
import onnxruntime
import numpy as np
import torch
import torchvision
from detrac import Detrac
import dataset.transforms as T

root = r"D:\dataset\UA-DETRAC\Detrac_dataset"
transforms = []
transforms.append(T.ToTensor())
transformscompose = T.Compose(transforms)
detrac = Detrac(root, imgformat='jpg', transforms=transformscompose)
img = [detrac[0][0]]

onnx_model = onnx.load("carmodel2.onnx")
onnx.checker.check_model(onnx_model)
ort_session = onnxruntime.InferenceSession("carmodel2.onnx")

checkpoint = torch.load(r"D:\dataset\UA-DETRAC\model_9.pth",
                        map_location='cpu')
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes=5,
                                                             pretrained=False)
model.load_state_dict(checkpoint['model'])
model.eval()
torch_out = model(img)
print(torch_out)


def to_numpy(tensor):
    return tensor.detach().cpu().numpy(
Beispiel #10
0
def inference(args):
    
    if args.target=='mnistm':
        args.source = 'usps'
    elif args.target=='usps':
        args.source = 'svhn'
    elif args.target=='svhn':
        args.source = 'mnistm'
    else:
        raise NotImplementedError(f"{args.target}: not implemented!")
    
    size = args.img_size
    t1 = transforms.Compose([
            transforms.Resize(size),
            transforms.Grayscale(3),
            transforms.ToTensor(),
            transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
        ])

    valid_target_dataset = Digits_Dataset_Test(args.dataset_path, t1)
        
    valid_target_dataloader = DataLoader(valid_target_dataset,
                                             batch_size=512,
                                             num_workers=6)
        
         
    load = torch.load(
        f"./p3/result/3_2/{args.source}2{args.target}/best_model.pth",
        map_location='cpu')
        
    feature_extractor = FeatureExtractor()
    feature_extractor.load_state_dict(load['F'])
    feature_extractor.cuda()
    feature_extractor.eval()

    label_predictor = LabelPredictor()
    label_predictor.load_state_dict(load['C'])
    label_predictor.cuda()
    label_predictor.eval()
           
    out_preds = []
    out_fnames = []
    count=0
    for i,(imgs, fnames) in enumerate(valid_target_dataloader):
        bsize = imgs.size(0)

        imgs = imgs.cuda()

        features = feature_extractor(imgs)
        class_output = label_predictor(features)
        
        _, preds = class_output.max(1)
        preds = preds.detach().cpu()
        
        out_preds.append(preds)
        out_fnames += fnames
        
        count+=bsize
        print(f"\t [{count}/{len(valid_target_dataloader.dataset)}]", 
                                                        end="   \r")
        
    out_preds = torch.cat(out_preds)
    out_preds = out_preds.cpu().numpy()
    
    d = {'image_name':out_fnames, 'label':out_preds}
    df = pd.DataFrame(data=d)
    df = df.sort_values('image_name')
    df.to_csv(args.out_csv, index=False)
    print(f' [Info] finish predicting {args.dataset_path}')
Beispiel #11
0
                        default='cuda:0',
                        help='cpu or cuda:0 or cuda:1')

    args = parser.parse_args() if string is None else parser.parse_args(string)
    return args


if __name__ == '__main__':

    args = parse_args()

    wandb.init(config=args, project='dlcv_gan_face')

    transform = transforms.Compose([
        transforms.Resize(args.img_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.5] * 3, [0.5] * 3)
    ])
    train_dataset = Face_Dataset('../hw3_data/face/train', transform)
    valid_dataset = Face_Dataset('../hw3_data/face/test', transform)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch,
                                  shuffle=True,
                                  num_workers=args.num_workers)
    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=args.batch,
                                  num_workers=args.num_workers)

    train(args, train_dataloader, valid_dataloader)
Beispiel #12
0
    # others
    parser.add_argument('--device',
                        type=str,
                        default='cuda:0',
                        help='cpu or cuda:0 or cuda:1')

    args = parser.parse_args() if string is None else parser.parse_args(string)
    return args


if __name__ == '__main__':

    args = parse_args()

    wandb.init(config=args, project='dlcv_gan_face')

    transform = transforms.Compose(
        [transforms.RandomHorizontalFlip(),
         transforms.ToTensor()])
    train_dataset = Face_Dataset('../hw3_data/face/train', transform)
    valid_dataset = Face_Dataset('../hw3_data/face/test', transform)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch,
                                  shuffle=True,
                                  num_workers=args.num_workers)
    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=args.batch,
                                  num_workers=args.num_workers)

    train(args, train_dataloader, valid_dataloader)
    def transform_val(self, input_data):
        rgb = np.array(input_data["image"]).astype(np.float32)
        lidar_depth = np.array(input_data["lidar_depth"]).astype(np.float32)
        radar_depth = np.array(input_data["radar_depth"]).astype(np.float32)
        if 'index_map' in input_data.keys():
            index_map = np.array(input_data["index_map"]).astype(np.int)

        # Then, we add model-aware resizing
        if self.transform_mode == "DORN":
            if cfg.scaling is True:
                h, w, _ = tuple((np.array(rgb.shape)).astype(np.int32))
            else:
                h, w, _ = tuple((np.array(rgb.shape) * 0.5).astype(np.int32))

            h_new = self.t_cfg.crop_size_train[0]
            w_new = w
            resize_image_method = transforms.Resize([h_new, w_new],
                                                    interpolation="bilinear")
            resize_depth_method = transforms.Resize([h_new, w_new],
                                                    interpolation="nearest")
        elif self.transform_mode == "sparse-to-dense":
            h_new = self.t_cfg.crop_size_train[0]
            w_new = self.t_cfg.crop_size_train[1]
            resize_image_method = transforms.Resize([h_new, w_new],
                                                    interpolation="bilinear")
            resize_depth_method = transforms.Resize([h_new, w_new],
                                                    interpolation="nearest")

        transform_rgb = transforms.Compose([
            # resize_image_method,
            transforms.CenterCrop(self.t_cfg.crop_size_val)
        ])
        transform_depth = transforms.Compose([
            # resize_depth_method,
            transforms.CenterCrop(self.t_cfg.crop_size_val)
        ])

        rgb = transform_rgb(rgb)
        rgb = rgb / 255.
        lidar_depth = transform_depth(lidar_depth)

        rgb = np.array(rgb).astype(np.float32)
        lidar_depth = np.array(lidar_depth).astype(np.float32)

        rgb = to_tensor(rgb)
        lidar_depth = to_tensor(lidar_depth)

        radar_depth = transform_depth(radar_depth)
        radar_depth = np.array(radar_depth).astype(np.float32)
        radar_depth = to_tensor(radar_depth)

        # Perform transform on index map
        if 'index_map' in input_data.keys():
            index_map = transform_depth(index_map)
            index_map = np.array(index_map).astype(np.int)
            index_map = to_tensor(index_map)
            index_map = index_map.unsqueeze(0)

        # Normalize to imagenet mean and std
        if self.transform_mode == "DORN":
            rgb = transforms.normalization_imagenet(rgb)

        ####################
        ## Filtering part ##
        ####################
        if self.sparsifier == "radar_filtered":
            # Indicating the invalid entries
            invalid_mask = ~input_data['valid_mask']
            invalid_index = np.where(invalid_mask)[0]
            invalid_index_mask = invalid_index[None, None,
                                               ...].transpose(2, 0, 1)

            # Constructing mask for dense depth
            dense_mask = torch.ByteTensor(
                np.sum(index_map.numpy() == invalid_index_mask, axis=0))
            radar_depth_filtered = radar_depth.clone()
            radar_depth_filtered[dense_mask.to(torch.bool)] = 0.
            radar_depth_filtered = radar_depth_filtered.unsqueeze(0)
            # ipdb.set_trace()
            ####################

        ######################################
        ## Filtering using predicted labels ##
        ######################################
        if self.sparsifier == "radar_filtered2":
            # ipdb.set_trace()
            invalid_mask = ~input_data['pred_labels']
            invalid_index = np.where(invalid_mask)[0]
            invalid_index_mask = invalid_index[None, None,
                                               ...].transpose(2, 0, 1)

            dense_mask = torch.ByteTensor(
                np.sum(index_map.numpy() == invalid_index_mask, axis=0))
            radar_depth_filtered2 = radar_depth.clone()
            radar_depth_filtered2[dense_mask.to(torch.bool)] = 0.
            radar_depth_filtered2 = radar_depth_filtered2.unsqueeze(0)
            ######################################

        lidar_depth = lidar_depth.unsqueeze(0)
        radar_depth = radar_depth.unsqueeze(0)

        # Return different data for different modality
        ################ Input sparsifier #########
        if self.modality == "rgb":
            inputs = rgb
        elif self.modality == "rgbd":
            if self.sparsifier == "radar":
                # Filter out the the points exceeding max_depth
                mask = (radar_depth > self.max_depth)
                radar_depth[mask] = 0
                inputs = torch.cat((rgb, radar_depth), dim=0)
            elif self.sparsifier == "radar_filtered":
                # Filter out the points exceeding max_depth
                mask = (radar_depth_filtered > self.max_depth)
                radar_depth_filtered[mask] = 0
                inputs = torch.cat((rgb, radar_depth_filtered), dim=0)
            # Using the learned classifyer
            elif self.sparsifier == "radar_filtered2":
                # Filter out the points exceeding max_depth
                mask = (radar_depth_filtered2 > self.max_depth)
                radar_depth_filtered2[mask] = 0
                inputs = torch.cat((rgb, radar_depth_filtered2), dim=0)
            else:
                s_depth = self.get_sparse_depth(lidar_depth, radar_depth)
                inputs = torch.cat((rgb, s_depth), dim=0)
        else:
            raise ValueError("[Error] Unsupported modality. Consider ",
                             self.avail_modality)
        labels = lidar_depth

        output_dict = {
            "rgb": rgb,
            "lidar_depth": lidar_depth,
            "radar_depth": radar_depth,
            "inputs": inputs,
            "labels": labels
        }

        if self.sparsifier == "radar_filtered":
            output_dict["radar_depth_filtered"] = radar_depth_filtered

        if self.sparsifier == "radar_filtered2":
            output_dict["radar_depth_filtered2"] = radar_depth_filtered2

        # For 'index_map' compatibility
        if 'index_map' in input_data.keys():
            output_dict["index_map"] = index_map

        return output_dict
    def transform_train(self, input_data):
        # import ipdb; ipdb.set_trace()
        # Fetch the data
        rgb = np.array(input_data["image"]).astype(np.float32)
        lidar_depth = np.array(input_data["lidar_depth"]).astype(np.float32)
        radar_depth = np.array(input_data["radar_depth"]).astype(np.float32)
        if 'index_map' in input_data.keys():
            index_map = np.array(input_data["index_map"]).astype(np.int)

        # Define augmentation factor
        scale_factor = np.random.uniform(
            self.t_cfg.scale_factor_train[0],
            self.t_cfg.scale_factor_train[1])  # random scaling
        angle_factor = np.random.uniform(
            -self.t_cfg.rotation_factor,
            self.t_cfg.rotation_factor)  # random rotation degrees
        flip_factor = np.random.uniform(0.0,
                                        1.0) < 0.5  # random horizontal flip

        # Compose customized transform for RGB and Depth separately
        color_jitter = transforms.ColorJitter(0.2, 0.2, 0.2)
        resize_image = transforms.Resize(scale_factor,
                                         interpolation="bilinear")
        resize_depth = transforms.Resize(scale_factor, interpolation="nearest")

        # # First, we uniformly downsample all the images by half
        # resize_image_initial = transforms.Resize(0.5, interpolation="bilinear")
        # resize_depth_initial = transforms.Resize(0.5, interpolation="nearest")

        # Then, we add model-aware resizing
        if self.transform_mode == "DORN":
            if cfg.scaling is True:
                h, w, _ = tuple((np.array(rgb.shape)).astype(np.int32))
            else:
                h, w, _ = tuple((np.array(rgb.shape) * 0.5).astype(np.int32))

            # ipdb.set_trace()
            h_new = self.t_cfg.crop_size_train[0]
            w_new = w
            resize_image_method = transforms.Resize([h_new, w_new],
                                                    interpolation="bilinear")
            resize_depth_method = transforms.Resize([h_new, w_new],
                                                    interpolation="nearest")
        elif self.transform_mode == "sparse-to-dense":
            h_new = self.t_cfg.crop_size_train[0]
            w_new = self.t_cfg.crop_size_train[1]
            resize_image_method = transforms.Resize([h_new, w_new],
                                                    interpolation="bilinear")
            resize_depth_method = transforms.Resize([h_new, w_new],
                                                    interpolation="nearest")

        # Get the border of random crop
        h_scaled, w_scaled = math.floor(h_new * scale_factor), math.floor(
            (w_new * scale_factor))
        h_bound, w_bound = h_scaled - self.t_cfg.crop_size_train[
            0], w_scaled - self.t_cfg.crop_size_train[1]
        h_startpoint = round(np.random.uniform(0, h_bound))
        w_startpoint = round(np.random.uniform(0, w_bound))

        # Compose the transforms for RGB
        transform_rgb = transforms.Compose([
            transforms.Rotate(angle_factor), resize_image,
            transforms.Crop(h_startpoint, w_startpoint,
                            self.t_cfg.crop_size_train[0],
                            self.t_cfg.crop_size_train[1]),
            transforms.HorizontalFlip(flip_factor)
        ])

        # Compose the transforms for Depth
        transform_depth = transforms.Compose([
            transforms.Rotate(angle_factor), resize_depth,
            transforms.Crop(h_startpoint, w_startpoint,
                            self.t_cfg.crop_size_train[0],
                            self.t_cfg.crop_size_train[1]),
            transforms.HorizontalFlip(flip_factor)
        ])

        # Perform transform on rgb data
        # ToDo: whether we need to - imagenet mean here
        rgb = transform_rgb(rgb)
        rgb = color_jitter(rgb)
        rgb = rgb / 255.

        # Perform transform on lidar depth data
        lidar_depth /= float(scale_factor)
        lidar_depth = transform_depth(lidar_depth)

        rgb = np.array(rgb).astype(np.float32)
        lidar_depth = np.array(lidar_depth).astype(np.float32)

        rgb = to_tensor(rgb)
        lidar_depth = to_tensor(lidar_depth)

        # Perform transform on radar depth data
        radar_depth /= float(scale_factor)
        radar_depth = transform_depth(radar_depth)

        radar_depth = np.array(radar_depth).astype(np.float32)
        radar_depth = to_tensor(radar_depth)

        # Perform transform on index map
        if 'index_map' in input_data.keys():
            index_map = transform_depth(index_map)
            index_map = np.array(index_map).astype(np.int)
            index_map = to_tensor(index_map)
            index_map = index_map.unsqueeze(0)

        # Normalize rgb using imagenet mean and std
        # ToDo: only do imagenet normalization on DORN
        if self.transform_mode == "DORN":
            rgb = transforms.normalization_imagenet(rgb)

        if self.sparsifier == "radar_filtered":
            ####################
            ## Filtering part ##
            ####################
            # Indicating the invalid entries
            invalid_mask = ~input_data['valid_mask']
            invalid_index = np.where(invalid_mask)[0]
            invalid_index_mask = invalid_index[None, None,
                                               ...].transpose(2, 0, 1)

            # Constructing mask for dense depth
            dense_mask = torch.ByteTensor(
                np.sum(index_map.numpy() == invalid_index_mask, axis=0))
            radar_depth_filtered = radar_depth.clone()
            radar_depth_filtered[dense_mask.to(torch.bool)] = 0.
            radar_depth_filtered = radar_depth_filtered.unsqueeze(0)

        if self.sparsifier == "radar_filtered2":
            ######################################
            ## Filtering using predicted labels ##
            ######################################
            invalid_mask = ~input_data['pred_labels']
            invalid_index = np.where(invalid_mask)[0]
            invalid_index_mask = invalid_index[None, None,
                                               ...].transpose(2, 0, 1)

            dense_mask = torch.ByteTensor(
                np.sum(index_map.numpy() == invalid_index_mask, axis=0))
            radar_depth_filtered2 = radar_depth.clone()
            radar_depth_filtered2[dense_mask.to(torch.bool)] = 0.
            radar_depth_filtered2 = radar_depth_filtered2.unsqueeze(0)
            ######################################

        lidar_depth = lidar_depth.unsqueeze(0)
        radar_depth = radar_depth.unsqueeze(0)

        # Return different data for different modality
        if self.modality == "rgb":
            inputs = rgb
        elif self.modality == "rgbd":
            if self.sparsifier == "radar":
                # Filter out the the points exceeding max_depth
                mask = (radar_depth > self.max_depth)
                radar_depth[mask] = 0
                inputs = torch.cat((rgb, radar_depth), dim=0)
            # Using the generated groundtruth
            elif self.sparsifier == "radar_filtered":
                # Filter out the points exceeding max_depth
                mask = (radar_depth_filtered > self.max_depth)
                radar_depth_filtered[mask] = 0
                inputs = torch.cat((rgb, radar_depth_filtered), dim=0)
            # Using the learned classifyer
            elif self.sparsifier == "radar_filtered2":
                # Filter out the points exceeding max_depth
                mask = (radar_depth_filtered2 > self.max_depth)
                radar_depth_filtered2[mask] = 0
                inputs = torch.cat((rgb, radar_depth_filtered2), dim=0)
            else:
                s_depth = self.get_sparse_depth(lidar_depth, radar_depth)
                inputs = torch.cat((rgb, s_depth), dim=0)
        else:
            raise ValueError("[Error] Unsupported modality. Consider ",
                             self.avail_modality)
        labels = lidar_depth

        # Gathering output results
        output_dict = {
            "rgb": rgb,
            "lidar_depth": lidar_depth,
            "radar_depth": radar_depth,
            "inputs": inputs,
            "labels": labels
        }
        if self.sparsifier == "radar_filtered":
            output_dict["radar_depth_filtered"] = radar_depth_filtered

        if self.sparsifier == "radar_filtered2":
            output_dict["radar_depth_filtered2"] = radar_depth_filtered2

        if 'index_map' in input_data.keys():
            output_dict["index_map"] = index_map

        return output_dict
Beispiel #15
0
# trainloader = torch.utils.data.DataLoader(CSDataSet(args.data_dir, './dataset/list/cityscapes/train.lst', max_iters=args.num_steps*args.batch_size, crop_size=(h, w),
#                 scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN),
#                 batch_size=args.batch_size, shuffle=True, num_workers=4, pin_memory=True)
# valloader = torch.utils.data.DataLoader(CSDataSet(args.data_dir, './dataset/list/cityscapes/val.lst', crop_size=(1024, 2048), mean=IMG_MEAN, scale=False, mirror=False),
#                                 batch_size=2, shuffle=False, pin_memory=True)

value_scale = 255
mean = [0.485, 0.456, 0.406]
mean = [item * 255 for item in mean]
std = [0.229, 0.224, 0.225]
std = [item * 255 for item in std]
train_transform = my_trans.Compose([
    # my_trans.Resize((args.height, args.width)),
    # my_trans.RandScale([0.5, 2.0]),
    # my_trans.RandomGaussianBlur(),
    my_trans.RandomHorizontalFlip(),
    # my_trans.Crop([args.height, args.width],crop_type='rand', padding=mean, ignore_label=255),
    my_trans.ToTensor(),  # without div 255
    my_trans.Normalize(mean=mean, std=std)
])
val_transform = my_trans.Compose([
    # my_trans.Resize((args.height, args.width)),
    my_trans.ToTensor(),  # without div 255
    my_trans.Normalize(mean=mean, std=std)
])

data_dir = '/data/zzg/CamVid/'
train_dataset = CamVid(data_dir,
                       mode='train',
                       p=None,
                       transform=train_transform)
Beispiel #16
0
def main():
    global best_acc

    if not os.path.isdir(args.out):
        mkdir_p(args.out)

    # Data
    print(f'==> Preparing cifar10')
    transform_train = transforms.Compose([
        transforms.RandomCrop(32),
        transforms.RandomFlip(),
        transforms.ToTensor(),
    ])

    transform_val = transforms.Compose([
        transforms.CenterCrop(32),
        transforms.ToTensor(),
    ])

    train_labeled_set, train_unlabeled_set, _, val_set, test_set = dataset.get_cifar10(
        './data',
        args.n_labeled,
        args.outdata,
        transform_train=transform_train,
        transform_val=transform_val)

    labeled_trainloader = data.DataLoader(train_labeled_set,
                                          batch_size=args.batch_size,
                                          shuffle=True,
                                          num_workers=0,
                                          drop_last=True)
    unlabeled_trainloader = data.DataLoader(train_unlabeled_set,
                                            batch_size=args.batch_size,
                                            shuffle=True,
                                            num_workers=0,
                                            drop_last=True)
    val_loader = data.DataLoader(val_set,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=0)
    test_loader = data.DataLoader(test_set,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=0)

    # Model
    print("==> creating WRN-28-2")

    def create_model(ema=False):
        model = models.WideResNet(num_classes=10)
        model = model.cuda()

        if ema:
            for param in model.parameters():
                param.detach_()

        return model

    model = create_model()
    ema_model = create_model(ema=True)

    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    train_criterion = SemiLoss()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    ema_optimizer = WeightEMA(model, ema_model, alpha=args.ema_decay)
    start_epoch = 0

    # Resume
    title = 'noisy-cifar-10'
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.out = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        ema_model.load_state_dict(checkpoint['ema_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.out, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(args.out, 'log.txt'), title=title)
        logger.set_names([
            'Train Loss', 'Train Loss X', 'Train Loss U', 'Valid Loss',
            'Valid Acc.', 'Test Loss', 'Test Acc.'
        ])

    writer = SummaryWriter(args.out)
    step = 0
    test_accs = []
    # Train and val
    for epoch in range(start_epoch, args.epochs):

        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.epochs, state['lr']))

        train_loss, train_loss_x, train_loss_u = train(
            labeled_trainloader, unlabeled_trainloader, model, optimizer,
            ema_optimizer, train_criterion, epoch, use_cuda)
        _, train_acc = validate(labeled_trainloader,
                                ema_model,
                                criterion,
                                epoch,
                                use_cuda,
                                mode='Train Stats')
        val_loss, val_acc = validate(val_loader,
                                     ema_model,
                                     criterion,
                                     epoch,
                                     use_cuda,
                                     mode='Valid Stats')
        test_loss, test_acc = validate(test_loader,
                                       ema_model,
                                       criterion,
                                       epoch,
                                       use_cuda,
                                       mode='Test Stats ')

        step = args.val_iteration * (epoch + 1)

        writer.add_scalar('losses/train_loss', train_loss, step)
        writer.add_scalar('losses/valid_loss', val_loss, step)
        writer.add_scalar('losses/test_loss', test_loss, step)

        writer.add_scalar('accuracy/train_acc', train_acc, step)
        writer.add_scalar('accuracy/val_acc', val_acc, step)
        writer.add_scalar('accuracy/test_acc', test_acc, step)

        # append logger file
        logger.append([
            train_loss, train_loss_x, train_loss_u, val_loss, val_acc,
            test_loss, test_acc
        ])

        # save model
        is_best = val_acc > best_acc
        best_acc = max(val_acc, best_acc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'ema_state_dict': ema_model.state_dict(),
                'acc': val_acc,
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            }, is_best)
        test_accs.append(test_acc)
    logger.close()
    writer.close()

    print('Mean acc:')
    print(np.mean(test_accs[-20:]))
Beispiel #17
0
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)
Beispiel #18
0
    args = parser.parse_args() if string is None else parser.parse_args(string)
    return args 
    
if __name__=='__main__':
    
    args = parse_args()
   
    wandb.init(config=args, 
        project=f'dlcv_naive_{args.source}2{args.target}')

    size = 64
    t0 = transforms.Compose([
            transforms.Resize(size),
            transforms.ColorJitter(),
            transforms.RandomRotation(15, fill=(0,)),
            transforms.Grayscale(3),
            transforms.ToTensor(),
            transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
        ])
    t1 = transforms.Compose([
            transforms.Resize(size),
            transforms.Grayscale(3),
            transforms.ToTensor(),
            transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
        ])

    root = '../hw3_data/digits/'
    # dataset
    source, target = args.source, args.target 
    train_source_dataset = Digits_Dataset(root+f'{source}/train', source, t0)
    train_target_dataset = Digits_Dataset(root+f'{target}/train', target, t0)
Beispiel #19
0
    print("Preprocessing finished!")

cuda_available = torch.cuda.is_available()

# directory results
if not os.path.exists(RESULTS_PATH):
    os.makedirs(RESULTS_PATH)

# Load dataset
mean = m
std_dev = s

transform_train = transforms.Compose([
    transforms.RandomApply([transforms.ColorJitter(0.1, 0.1, 0.1, 0.1)],
                           p=0.5),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std_dev)
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std_dev)
])

training_set = LocalDataset(IMAGES_PATH,
                            TRAINING_PATH,
                            transform=transform_train)
validation_set = LocalDataset(IMAGES_PATH,
                              VALIDATION_PATH,
Beispiel #20
0
                        type=str,
                        default='cuda:0',
                        help='cpu or cuda:0 or cuda:1')

    args = parser.parse_args()
    return args


if __name__ == '__main__':

    args = parse_args()

    wandb.init(config=args, project='dlcv_face_vae')

    train_transform = transforms.Compose([
        #transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])
    valid_transform = transforms.Compose([
        transforms.ToTensor(),
    ])

    train_dataset = Face_Dataset('../hw3_data/face/train', train_transform)
    valid_dataset = Face_Dataset('../hw3_data/face/test', valid_transform)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch,
                                  shuffle=True,
                                  num_workers=8)
    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=args.batch * 2,
                                  shuffle=False,