Ejemplo n.º 1
0
def get_dataset(options):
    # Choose the embedding network
    if options.dataset == 'miniImageNet':
        from torchmeta.datasets import MiniImagenet
        mean_pix = [
            x / 255 for x in [129.37731888, 124.10583864, 112.47758569]
        ]
        std_pix = [x / 255 for x in [68.20947949, 65.43124043, 70.45866994]]
        if options.network == 'ResNet18':
            transform = Compose([
                Resize(224),
                ToTensor(),
                Normalize(mean=mean_pix, std=std_pix),
            ]),
        else:
            transform = Compose([
                ToTensor(),
                Normalize(mean=mean_pix, std=std_pix),
            ]),
        if options.network == 'ResNet18':
            dataset_test = MiniImagenet(
                "data",
                num_classes_per_task=options.way,
                transform=Compose([
                    Resize(224),
                    ToTensor(),
                    Normalize(mean=mean_pix, std=std_pix),
                ]),
                target_transform=Categorical(num_classes=options.way),
                meta_val=True,
                download=False)
        else:
            dataset_test = MiniImagenet(
                "data",
                num_classes_per_task=options.way,
                transform=Compose([
                    ToTensor(),
                    Normalize(mean=mean_pix, std=std_pix),
                ]),
                target_transform=Categorical(num_classes=options.way),
                meta_val=True,
                download=False)
        dataset_test = ClassSplitter(dataset_test,
                                     shuffle=True,
                                     num_train_per_class=options.shot,
                                     num_test_per_class=options.query)
        dataloader_test = BatchMetaDataLoader(dataset_test,
                                              batch_size=1,
                                              num_workers=options.num_workers)
    elif options.dataset == 'tieredImageNet':
        from torchmeta.datasets import TieredImagenet
        mean_pix = [
            x / 255 for x in [129.37731888, 124.10583864, 112.47758569]
        ]
        std_pix = [x / 255 for x in [68.20947949, 65.43124043, 70.45866994]]
        dataset_test = TieredImagenet(
            "data",
            num_classes_per_task=options.way,
            transform=Compose([
                ToTensor(),
                Normalize(mean=mean_pix, std=std_pix),
            ]),
            target_transform=Categorical(num_classes=options.way),
            meta_test=True,
            download=True)
        dataset_test = ClassSplitter(dataset_test,
                                     shuffle=True,
                                     num_train_per_class=options.shot,
                                     num_test_per_class=options.query)
        dataloader_test = BatchMetaDataLoader(dataset_test,
                                              batch_size=1,
                                              num_workers=options.num_workers)
    elif options.dataset == 'CIFAR_FS':
        from torchmeta.datasets import CIFARFS
        mean_pix = [
            x / 255.0 for x in [129.37731888, 124.10583864, 112.47758569]
        ]
        std_pix = [x / 255.0 for x in [68.20947949, 65.43124043, 70.45866994]]
        dataset_test = CIFARFS(
            "data",
            num_classes_per_task=options.way,
            transform=Compose([
                ToTensor(),
                Normalize(mean=mean_pix, std=std_pix),
            ]),
            target_transform=Categorical(num_classes=options.way),
            meta_test=True,
            download=True)
        dataset_test = ClassSplitter(dataset_test,
                                     shuffle=True,
                                     num_train_per_class=options.shot,
                                     num_test_per_class=options.query)
        dataloader_test = BatchMetaDataLoader(dataset_test,
                                              batch_size=1,
                                              num_workers=options.num_workers)
    elif options.dataset == 'FC100':
        from torchmeta.datasets import FC100
        mean_pix = [
            x / 255.0 for x in [129.37731888, 124.10583864, 112.47758569]
        ]
        std_pix = [x / 255.0 for x in [68.20947949, 65.43124043, 70.45866994]]
        dataset_test = FC100(
            "data",
            num_classes_per_task=options.way,
            transform=Compose([
                ToTensor(),
                Normalize(mean=mean_pix, std=std_pix),
            ]),
            target_transform=Categorical(num_classes=options.way),
            meta_test=True,
            download=True)
        dataset_test = ClassSplitter(dataset_test,
                                     shuffle=True,
                                     num_train_per_class=options.shot,
                                     num_test_per_class=options.query)
        dataloader_test = BatchMetaDataLoader(dataset_test,
                                              batch_size=1,
                                              num_workers=options.num_workers)
    else:
        print("Cannot recognize the dataset type")
        assert (False)

    return dataloader_test
Ejemplo n.º 2
0
from common.dataset import get_test_data_loader

SEED = 12345
DEBUG = True

OUTPUT_PATH = "output"
dataset_path = Path(
    "/home/fast_storage/imaterialist-challenge-furniture-2018/")
SAMPLE_SUBMISSION_PATH = dataset_path / "sample_submission_randomlabel.csv"

TEST_TRANSFORMS = [
    RandomResizedCrop(350, scale=(0.7, 1.0), interpolation=3),
    RandomVerticalFlip(p=0.5),
    RandomHorizontalFlip(p=0.5),
    ToTensor(),
    Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
]

N_CLASSES = 128
BATCH_SIZE = 24
NUM_WORKERS = 15

TEST_LOADER = get_test_data_loader(dataset_path=dataset_path / "test",
                                   test_data_transform=TEST_TRANSFORMS,
                                   batch_size=BATCH_SIZE,
                                   num_workers=NUM_WORKERS,
                                   cuda=True)

MODEL = (
    Path(OUTPUT_PATH) / "training_FurnitureInceptionV4_350_20180427_1512" /
    "model_FurnitureInceptionV4_350_13_val_loss=0.5497886.pth").as_posix()
Ejemplo n.º 3
0
        df = main_df

        df['class_id'] = df['target'].map(class_map)

        df['is_manip'] = 0
        df = df[df['target'].notnull()]
        df['to_rotate'] = 0
        return df

    return None


train_transform = Compose([
    albu_trans.RandomCrop(target_size),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = Compose([
    albu_trans.CenterCrop(target_size),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


def add_args(parser):
    arg = parser.add_argument
    arg('--root', default='runs/debug', help='checkpoint root')
    arg('--batch-size', type=int, default=4)
    arg('--n-epochs', type=int, default=30)
    arg('--lr', type=float, default=0.0001)
Ejemplo n.º 4
0
EPOCH = 20
BATCH_SIZE = 64
LR = 0.001
MODEL = "senet"
GPU_NUMS = 0

MODEL_LIST = [
    {
        "name": "alexnet",
        "model": alexnet,
        "pretrained" : False,
        "transform" : Compose([Resize(256),
                               RandomCrop(224),
                               RandomHorizontalFlip(),
                               ToTensor(),
                               Normalize([0.485, 0.456, -.406],[0.229, 0.224, 0.225])
                                                      ])
    },
    {
        "name": "vgg16",
        "model": vgg16,
        "pretrained" : False,
        "transform" : ToTensor()
    },
    {
        "name": "vgg19",
        "model": vgg19,
        "pretrained" : False,
        "transform" : ToTensor()
    },
    {
Ejemplo n.º 5
0
def load(name: str,
         device: Union[str, torch.device] = "cuda"
         if torch.cuda.is_available() else "cpu"):
    if name not in _MODELS:
        raise RuntimeError(
            f"Model {name} not found; available models = {available_models()}")

    model_path = _download(_MODELS[name])
    model = torch.jit.load(model_path, map_location=device).eval()
    n_px = model.input_resolution.item()

    # patch the device names
    device_holder = torch.jit.trace(
        lambda: torch.ones([]).to(torch.device(device)), example_inputs=[])
    device_node = [
        n for n in device_holder.graph.findAllNodes("prim::Constant")
        if "Device" in repr(n)
    ][-1]

    def patch_device(module):
        graphs = [module.graph] if hasattr(module, "graph") else []
        if hasattr(module, "forward1"):
            graphs.append(module.forward1.graph)

        for graph in graphs:
            for node in graph.findAllNodes("prim::Constant"):
                if "value" in node.attributeNames() and str(
                        node["value"]).startswith("cuda"):
                    node.copyAttributes(device_node)

    model.apply(patch_device)
    patch_device(model.encode_image)
    patch_device(model.encode_text)

    # patch dtype to float32 on CPU
    if device == "cpu":
        float_holder = torch.jit.trace(lambda: torch.ones([]).float(),
                                       example_inputs=[])
        float_input = list(float_holder.graph.findNode("aten::to").inputs())[1]
        float_node = float_input.node()

        def patch_float(module):
            graphs = [module.graph] if hasattr(module, "graph") else []
            if hasattr(module, "forward1"):
                graphs.append(module.forward1.graph)

            for graph in graphs:
                for node in graph.findAllNodes("aten::to"):
                    inputs = list(node.inputs())
                    for i in [
                            1, 2
                    ]:  # dtype can be the second or third argument to aten::to()
                        if inputs[i].node()["value"] == 5:
                            inputs[i].node().copyAttributes(float_node)

        model.apply(patch_float)
        patch_float(model.encode_image)
        patch_float(model.encode_text)

        model.float()

    transform = Compose([
        Resize(n_px, interpolation=Image.BICUBIC),
        CenterCrop(n_px),
        lambda image: image.convert("RGB"),
        ToTensor(),
        Normalize((0.48145466, 0.4578275, 0.40821073),
                  (0.26862954, 0.26130258, 0.27577711)),
    ])

    return model, transform
Ejemplo n.º 6
0
def main(args):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if torch.cuda.is_available():
        print("Using CUDA, benchmarking implementations", file=sys.stderr)
        torch.backends.cudnn.benchmark = True

    # ImageNet statistics (because we use pre-trained model)
    mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]

    transform = Compose([
        ToImageMode("RGB"),
        Resize(256),
        CenterCrop(224),
        ToTensor(),
        Normalize(mean=mean, std=std)
    ])

    dataset = ImageDirectory(root=args.dataset, transform=transform)
    dataloader = DataLoader(dataset,
                            batch_size=args.batch_size,
                            num_workers=args.num_workers)

    # Binary classifier on top of resnet50
    model = resnet50()
    model.fc = nn.Linear(model.fc.in_features, 2)

    model = model.to(device)
    model = nn.DataParallel(model)

    # Restore trained weights
    weights = torch.load(str(args.model), map_location=device)
    model.load_state_dict(weights)

    model.eval()

    results = []

    with torch.no_grad():
        for inputs, paths in tqdm(dataloader,
                                  desc="infer",
                                  unit="batch",
                                  ascii=True):
            inputs = inputs.to(device)

            outputs = model(inputs)

            _, preds = torch.max(outputs, dim=1)
            preds = preds.data.cpu().numpy()

            probs = nn.functional.softmax(outputs, dim=1)
            probs = probs.data.cpu().numpy()

            for path, pred, prob in zip(paths, preds, probs):
                result = {
                    "class": pred.item(),
                    "probability": round(prob.max().item(), 3),
                    "path": Path(path).name
                }
                results.append(result)

    JsonIO.save(args.results, results)
Ejemplo n.º 7
0
def main(args):
    model = load_config(args.model)
    dataset = load_config(args.dataset)

    cuda = model["common"]["cuda"]

    device = torch.device("cuda" if cuda else "cpu")

    def map_location(storage, _):
        return storage.cuda() if cuda else storage.cpu()

    if cuda and not torch.cuda.is_available():
        sys.exit("Error: CUDA requested but not available")

    num_classes = len(dataset["common"]["classes"])

    # https://github.com/pytorch/pytorch/issues/7178
    chkpt = torch.load(args.checkpoint, map_location=map_location)

    net = UNet(num_classes).to(device)
    net = nn.DataParallel(net)

    if cuda:
        torch.backends.cudnn.benchmark = True

    net.load_state_dict(chkpt)
    net.eval()

    mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]

    transform = Compose([ConvertImageMode(mode="RGB"), ImageToTensor(), Normalize(mean=mean, std=std)])

    directory = BufferedSlippyMapDirectory(args.tiles, transform=transform, size=args.tile_size, overlap=args.overlap)
    loader = DataLoader(directory, batch_size=args.batch_size)

    # don't track tensors with autograd during prediction
    with torch.no_grad():
        for images, tiles in tqdm(loader, desc="Eval", unit="batch", ascii=True):
            images = images.to(device)
            outputs = net(images)

            # manually compute segmentation mask class probabilities per pixel
            probs = nn.functional.softmax(outputs, dim=1).data.cpu().numpy()

            for tile, prob in zip(tiles, probs):
                x, y, z = list(map(int, tile))

                # we predicted on buffered tiles; now get back probs for original image
                prob = directory.unbuffer(prob)

                # Quantize the floating point probabilities in [0,1] to [0,255] and store
                # a single-channel `.png` file with a continuous color palette attached.

                assert prob.shape[0] == 2, "single channel requires binary model"
                assert np.allclose(np.sum(prob, axis=0), 1.), "single channel requires probabilities to sum up to one"
                foreground = prob[1:, :, :]

                anchors = np.linspace(0, 1, 256)
                quantized = np.digitize(foreground, anchors).astype(np.uint8)

                palette = continuous_palette_for_color("pink", 256)

                out = Image.fromarray(quantized.squeeze(), mode="P")
                out.putpalette(palette)

                os.makedirs(os.path.join(args.probs, str(z), str(x)), exist_ok=True)
                path = os.path.join(args.probs, str(z), str(x), str(y) + ".png")

                out.save(path, optimize=True)
Ejemplo n.º 8
0
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.backends.cudnn.deterministic = True
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

print("PyTorch version:", torch.__version__)
print("CUDA version:", torch.version.cuda)
print("cuDNN version:", torch.backends.cudnn.version())
print("Using", device)

input_size = 256
frames_per_video = int(sys.argv[2])

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
normalize_transform = Normalize(mean, std)

facedet = BlazeFace().to(device)
facedet.load_weights("./helpers/blazeface.pth")
facedet.load_anchors("./helpers/anchors.npy")
_ = facedet.train(False)

video_reader = VideoReader()
video_read_fn = lambda x: video_reader.read_frames(x,
                                                   num_frames=frames_per_video)
face_extractor = FaceExtractor(video_read_fn, facedet)
''' Load and initialize models '''

models = []
weigths = []
stack_models = []
Ejemplo n.º 9
0
transform_train = transforms.Compose([
    # RandomApply([cnx_aug_thin_characters()], p=0.2),
    # RandomApply([cnx_aug_bold_characters()], p=0.4),
    # cnd_aug_randomResizePadding(imgH, imgW, min_scale, max_scale, fill=fill_color),
    cnd_aug_resizePadding(imgW, imgH, fill=fill_color),
    RandomApply([cnd_aug_add_line()], p=0.3),
    RandomApply([cnx_aug_blur()], p=0.3),
    ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    RandomApply([RandomAffine(shear=(-20, 20),
                              translate=(0.0, 0.05),
                              degrees=0,
                              # degrees=2,
                              # scale=(0.8, 1),
                              fillcolor=fill_color)], p=0.3)
    , ToTensor()
    , Normalize(mean, std)
])

transform_test = transforms.Compose([
    # cnd_aug_randomResizePadding(imgH, imgW, min_scale, max_scale, fill=fill_color, train=False),
    cnd_aug_resizePadding(imgW, imgH, fill=fill_color, train=False),
    ToTensor(),
    Normalize(mean, std)
])

parser = argparse.ArgumentParser()
parser.add_argument('--root', default=data_dir, help='path to root folder')
parser.add_argument('--train', default=train_file, help='path to train set')
parser.add_argument('--val', default=val_file, help='path to val set')
parser.add_argument('--workers', type=int, help='number of data loading workers', default=workers)
parser.add_argument('--batch_size', type=int, default=batch_size, help='input batch size')
Ejemplo n.º 10
0
def imagenet(root: str,
             img_size: int = 224,
             batch_size: int = 32,
             augment: bool = True,
             workers: int = 6,
             splits: Union[str, Tuple[str]] = ('train', 'val'),
             tiny: bool = False,
             pin_memory: bool = True,
             use_cache: bool = False,
             pre_cache: bool = False) -> Union[DataLoader, List[DataLoader]]:
    """Data loader for the ImageNet dataset.

    Args:
        root: The root directory where the image data is stored. Must contain a `train` and `val` directory with
          training and validation data respectively. If `tiny` is set to True, it must contain a `tiny` directory.
        img_size: The size of the image.
        batch_size: The batch size.
        augment: Whether to use data augmentation techniques.
        workers: The number of CPUs to use for when loading the data from disk.
        splits: Which splits of the data to return. Possible values are `train` and `val`.
        tiny: Whether to use the `Tiny ImageNet dataset <https://tiny-imagenet.herokuapp.com/>`_ instead of the
          full-size data. If True, `root` must contain a `tiny` directory with `train` and `val` directories inside.
        pin_memory: Whether to use the PyTorchs `pin memory` mechanism.
        use_cache: Whether to cache data in a `Cache` object.
        pre_cache: Whether to run caching before the first epoch.

    Returns:
        A list data loaders of the chosen splits.
    """
    if tiny:
        root = os.path.join(root, 'tiny')
    train_dir = os.path.join(root, 'train')
    test_dir = os.path.join(root, 'val')

    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    val_transform_list = list()
    if not tiny:
        val_transform_list.append(Resize(int(img_size * 8 / 7)))
        val_transform_list.append(CenterCrop(img_size))
    val_transform_list.append(ToTensor())
    val_transform_list.append(normalize)
    val_transform = Compose(val_transform_list)

    train_transform_list = list()
    if tiny:
        train_transform_list.append(RandomCrop(img_size, padding=8))
    else:
        train_transform_list.append(RandomResizedCrop(img_size))
    train_transform_list.append(RandomHorizontalFlip())
    train_transform_list.append(ToTensor())
    train_transform_list.append(normalize)
    train_transform = Compose(train_transform_list)

    loader_list = list()
    if 'train' in splits:
        train_set = ImageFolder(train_dir, train_transform if augment else val_transform)
        train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=workers,
                                  pin_memory=pin_memory)
        loader_list.append(train_loader)

    if 'val' or 'test' in splits:
        val_test_set = ImageFolder(test_dir, val_transform)
        val_set, test_set = torch.utils.data.random_split(val_test_set, [25000, 25000])

        if 'test' in splits:
            if use_cache:
                test_set = Cashed(test_set, img_size, channels=3)
            test_loader = DataLoader(test_set, batch_size=batch_size, num_workers=workers, pin_memory=pin_memory)
            if use_cache and pre_cache:
                print("Caching")
                for _ in tqdm(test_loader):
                    pass
                test_loader.dataset.set_use_cache(True)
                # test_loader.dataset.pin_memory()
            loader_list.append(test_loader)

        if 'val' in splits:
            if use_cache:
                val_set = Cashed(val_set, img_size, channels=3)
            val_loader = DataLoader(val_set, batch_size=batch_size, num_workers=workers, pin_memory=pin_memory)
            if use_cache and pre_cache:
                print("Caching")
                for _ in tqdm(val_loader):
                    pass
                val_loader.dataset.set_use_cache(True)
                # val_loader.dataset.pin_memory()
            loader_list.append(val_loader)

    if len(loader_list) == 1:
        return loader_list[0]
    return loader_list
Ejemplo n.º 11
0
def gtsrb(root: str,
          img_size: int = 32,
          batch_size: int = 32,
          workers: int = 6,
          splits: Union[str, Tuple[str]] = ('train', 'val'),
          pin_memory: bool = True) -> Union[LoaderTypes, Cashed, LoaderLists]:
    """Data loader for the `German Traffic Sign Recognition Benchmark
    <http://benchmark.ini.rub.de/?section=gtsrb&subsection=news>`_.

    Args:
        root: The root directory where the image data is stored. Must contain a `train`, `val` and `test` directory with
            training, validation and test data respectively.
        img_size: The size of the image.
        batch_size: The batch size.
        workers: The number of CPUs to use for when loading the data from disk.
        splits: Which splits of the data to return. Possible values are `train`, `val` and `test`.
        pin_memory: Whether to use the PyTorchs `pin memory` mechanism.

    Returns:
        A list data loaders of the chosen splits.
    """
    train_dir = os.path.join(root, 'train')
    val_dir = os.path.join(root, 'val')
    test_dir = os.path.join(root, 'test')

    normalize = Normalize([0.34038433, 0.3119956, 0.32119358], [0.05087305, 0.05426421, 0.05859348])
    if img_size > 32:
        val_transform = Compose([Resize(int(img_size * 8 / 7)),
                                 CenterCrop(img_size),
                                 ToTensor(),
                                 normalize])
        train_transform = Compose([RandomResizedCrop(img_size),
                                   RandomAffine(degrees=15, translate=(0.1, 0.1), shear=10),
                                   ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
                                   ToTensor(),
                                   normalize])
    else:
        val_transform = Compose([Resize(img_size + 10),
                                 CenterCrop(img_size),
                                 ToTensor(),
                                 normalize])
        train_transform = Compose([RandomCrop(img_size, padding=4),
                                   RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=10),
                                   ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
                                   ToTensor(),
                                   normalize])

    loader_list = list()
    if 'train' in splits:
        train_set = ImageFolder(train_dir, train_transform)

        weights = list()
        for c in range(43):
            dir_name = f"000{c}" if c > 9 else f"0000{c}"
            weights.append(len(os.listdir(os.path.join(train_dir, dir_name))[:-1]))
        weights = 1 / np.array(weights)
        weights = np.array([weights[t] for t in train_set.targets])
        sampler = torch.utils.data.sampler.WeightedRandomSampler(torch.from_numpy(weights).double(), len(weights))

        train_loader = DataLoader(train_set, batch_size=batch_size, sampler=sampler, num_workers=workers,
                                  pin_memory=pin_memory)
        loader_list.append(train_loader)
    if 'val' in splits:
        val_set = ImageFolder(val_dir, val_transform)
        if img_size > 32:
            val_set = Cashed(val_set, img_size, channels=3)
            val_loader = DataLoader(val_set, batch_size=batch_size, num_workers=workers,
                                                     pin_memory=pin_memory)
            for _ in val_loader:
                pass
            val_loader.dataset.set_use_cache(True)
            val_loader.dataset.pin_memory()
            loader_list.append(val_loader)
        else:
            val_set = Memory(val_set, img_size=img_size, channels=3)
            for _ in val_set:
                pass
            val_set.set_use_cache(True)
            val_set.pin_memory()
            loader_list.append(val_set)

    if 'test' in splits:
        test_set = ImageFolder(test_dir, val_transform)
        test_set = Memory(test_set, img_size=img_size, channels=3)
        for _ in test_set:
            pass
        test_set.set_use_cache(True)
        test_set.pin_memory()
        loader_list.append(test_set)

    if len(loader_list) == 1:
        return loader_list[0]
    return loader_list
Ejemplo n.º 12
0
        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1, 1))  # 128x1x1
        self.fc_layer = nn.Linear(128, 10)  # Bx

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.relu(self.layer3(x))

        x = self.global_avg_pooling(x)
        x = x.squeeze()  # Bx128x1x1 -> Bx128
        x = self.fc_layer(x)
        return x


transforms = Compose([ToTensor(),  # -> [0,1]
                      Normalize(mean=[0.5], std=[0.5])])  # -> [-1,1]

dataset = MNIST(root='.', download=True, transform=transforms, train=True)

data_loader = DataLoader(dataset=dataset, batch_size=64, shuffle=True)

model = CNN()

criterion = nn.CrossEntropyLoss()  # loss function

optim = torch.optim.Adam(model.parameters(), lr=0.01)
list_loss = list()

for epoch in range(10):
    for input, label in tqdm(data_loader):
        results = model(input)
	def __init__(self, file_path, imageSize):
		
		self.df = pd.read_csv(file_path, delim_whitespace=True, header=None)
		self.transform = Compose([Resize(imageSize), RandomCrop(imageSize), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
	def __init__(self, data_dir, imageSize):
		self.data_files = os.listdir(data_dir)
		self.transform = Compose([Resize(imageSize), RandomCrop(imageSize), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
		self.data_files.sort()
		self.root = data_dir
		self.imageSize = imageSize
Ejemplo n.º 15
0
    tmpstr = tmpstr + ')'
    return tmpstr


LEARNING_RATE = 0.01
NEPOCHS = 30
BATCH_SIZE = 32
HIDDEN_SIZE = 32
NUM_LAYERS = 1
inputs = torch.from_numpy(np.abs(data)).type(FloatTensor)
targets = Variable(torch.from_numpy(targets_onehot).type(LongTensor))

mean = [torch.mean(inputs)]  # [-0.5329]
std = [torch.std(inputs)]  # [1302.4]# torch.std(inputs)
norm = Normalize(mean, std)
inputs = norm(inputs.permute(2, 0, 1)).permute(1, 2, 0)

data_shape = inputs.shape  # inputs.size()
rand_seed = 1729
train, val, test = get_split_ind(rand_seed, data_shape, split_frac)
train_data = inputs[train, :, :]
val_data = inputs[val, :, :]
test_data = inputs[test, :, :]
train_label = targets[train, :]
val_label = targets[val, :]
test_label = targets[test, :]
# variable->np error as variable stores history of the object and np has no provision
# variable.data-> tensor->.numpy() gives array this can only be done on CPU so use .cpu()
train_data_shape = train_data.size()
# print np.histogram(train_label.data.cpu().numpy(), class_num)[0], np.histogram(test_label.data.cpu().numpy(), class_num)[0], np.unique(train_label.data.cpu().numpy())
def create_split_loaders(batch_size,
                         p_val=0.1, p_test=0.2, shuffle=True,
                         show_sample=False, extras={}):
    """ Creates the DataLoader objects for the training, validation, and test sets.

    Params:
    -------
    - imgs_dir: directory containing the image files
    - labels_dir: directory containing the label files
    - batch_size: (int) mini-batch size to load at a time
    - seed: (int) Seed for random generator (use for testing/reproducibility)
    - transform: A torchvision.transforms object - transformations to apply to each image
                 (Can be "transforms.Compose([transforms])")
    - p_val: (float) Percent (as decimal) of dataset to use for validation
    - p_test: (float) Percent (as decimal) of the dataset to split for testing
    - shuffle: (bool) Indicate whether to shuffle the dataset before splitting
    - show_sample: (bool) Plot a mini-example as a grid of the dataset
    - extras: (dict)
        If CUDA/GPU computing is supported, contains:
        - num_workers: (int) Number of subprocesses to use while loading the dataset
        - pin_memory: (bool) For use with CUDA - copy tensors into pinned memory
                  (set to True if using a GPU)
        Otherwise, extras is an empty dict.

    Returns:
    --------
    - train_loader: (DataLoader) The iterator for the training set
    - val_loader: (DataLoader) The iterator for the validation set
    - test_loader: (DataLoader) The iterator for the test set
    """


    tf = Compose([
            Resize((416,416)),
#            transforms.RandomHorizontalFlip(),
#            transforms.RandomVerticalFlip(),
            ToTensor(),
            Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    dataset = PascalVOC2012Dataset(mode='trainval', transforms=tf)

    # Dimensions and indices of training set
    dataset_size = dataset.__len__()
    all_indices = list(range(dataset_size))

    # Shuffle dataset before dividing into training & test sets
    if shuffle:
        np.random.seed(15)
        np.random.shuffle(all_indices)

    # Create the validation split from the full dataset
    val_split = int(np.floor(p_val * dataset_size))
    train_ind, val_ind = all_indices[val_split :], all_indices[: val_split]

    # Separate a test split from the training dataset
    test_split = int(np.floor(p_test * len(train_ind)))
    train_ind, test_ind = train_ind[test_split :], train_ind[: test_split]

    # Use the SubsetRandomSampler as the iterator for each subset
    sample_train = SubsetRandomSampler(train_ind)
    sample_test = SubsetRandomSampler(test_ind)
    sample_val = SubsetRandomSampler(val_ind)

    num_workers = 32
    pin_memory = False

    # If CUDA is available
    if extras:
        num_workers = extras["num_workers"]
        pin_memory = extras["pin_memory"]

    # Define the training, test, & validation DataLoaders
    train_loader = DataLoader(dataset, batch_size=batch_size,
                              sampler=sample_train, num_workers=num_workers,
                              pin_memory=pin_memory)

    ## set num_workers to 0
    test_loader = DataLoader(dataset, batch_size=batch_size,
                             sampler=sample_test, num_workers=0,
                             pin_memory=pin_memory)

    val_loader = DataLoader(dataset, batch_size=batch_size,
                            sampler=sample_val, num_workers=num_workers,
                            pin_memory=pin_memory)

    # Return the training, validation, test DataLoader objects
    return train_loader, val_loader, test_loader
Ejemplo n.º 17
0
 def perception(self, fake_imgs, real_imgs):
     normalizer = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
     fake_imgs = normalizer(fake_imgs)
     real_imgs = normalizer(real_imgs)
     return F.l1_loss(self.loss_network(fake_imgs),
                      self.loss_network(real_imgs))
Ejemplo n.º 18
0
def precompute_image_feat(mode="train"):

    batchsize = 3

    path = os.path.join("/media/datas1/dataset/clevr/CLEVR_v1.0/images/", mode)
    path_to_feat = "/media/datas2/precomputed/clevr_res101_np"
    path_to_feat = os.path.join(path_to_feat, mode)

    if not os.path.exists(path_to_feat):
        os.mkdir(path_to_feat)

    model = models.resnet101(pretrained=True)

    modules = list(model.children())[:-3]
    model = nn.Sequential(*modules)

    model = model.eval()

    if USE_CUDA:
        model = model.cuda()

    mean = torch.FloatTensor([0.485, 0.456, 0.406])
    std = torch.FloatTensor([0.229, 0.224, 0.225])

    transform = Compose(
        [Resize([224, 224]),
         ToTensor(),
         Normalize(mean=mean, std=std)])

    path_list = []
    img_list = []

    for img_path in tqdm.tqdm(glob.glob(path + "/*.png")):

        # Compute path and name of the image
        img_name_raw = os.path.basename(img_path)
        img_name_raw_wo_ext, ext = os.path.splitext(img_name_raw)
        feat_path = os.path.join(path_to_feat, img_name_raw_wo_ext + ".npy")

        # if os.path.exists(feat_path):
        #     continue

        # Load and transform image
        img = Image.open(img_path, mode="r")
        img = img.convert('RGB')
        img = transform(img).unsqueeze(0)

        # Store in list images and his path
        path_list.append(feat_path)
        img_list.append(img)

        assert len(path_list) == len(img_list)

        # if you reach batch limit, compute forward pass and store
        if len(path_list) == batchsize:

            batch = torch.cat(img_list, dim=0)
            batch = Variable(batch.type(FloatTensor), volatile=True)
            feats = model.forward(batch).data.cpu()

            # save each image at the right path
            for num_in_batch, img_save_path in enumerate(path_list):
                feat = feats[num_in_batch].numpy()
                np.save(img_save_path, feat)

            # clean after mess to redo
            path_list = []
            img_list = []

    # Because you can't exactly fall on good size of batches
    if len(path_list) > 0:
        assert len(path_list) == len(img_list)

        batch = torch.cat(img_list, dim=0)
        batch = Variable(batch.type(FloatTensor), volatile=True)
        feats = model.forward(batch).data.cpu()

        # save each image at the right path
        for num_in_batch, img_save_path in enumerate(path_list):
            feat = feats[num_in_batch].numpy()
            np.save(img_save_path, feat)
Ejemplo n.º 19
0
import torch
import torchvision
from torchvision.transforms import ToTensor, Normalize, Compose
from torchvision.datasets import MNIST

torch.manual_seed(42)

mnist = MNIST(root='../MNIST_Logistic_Regression/data/',
              train=True,
              download=True,
              transform=Compose(
                  [ToTensor(),
                   Normalize(mean=(0.5, ), std=(0.5, ))]))


def denorm(x):
    out = (x + 1) / 2
    return out.clamp(0, 1)


from torch.utils.data import DataLoader

batch_size = 100
data_loader = DataLoader(mnist, batch_size, shuffle=True)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

image_size = 784
hidden_size = 256
Ejemplo n.º 20
0
def normalize(stats: Tuple = IMAGENET_STATS) -> Normalize:
    return Normalize(*stats)
Ejemplo n.º 21
0
def main():
    # See all possible arguments in src/transformers/training_args.py
    # or by passing the --help flag to this script.
    # We now keep distinct sets of args, for a cleaner separation of concerns.

    parser = HfArgumentParser(
        (ModelArguments, DataTrainingArguments, CustomTrainingArguments))
    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
        # If we pass only one argument to the script and it's the path to a json file,
        # let's parse it to get our arguments.
        model_args, data_args, training_args = parser.parse_json_file(
            json_file=os.path.abspath(sys.argv[1]))
    else:
        model_args, data_args, training_args = parser.parse_args_into_dataclasses(
        )

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        handlers=[logging.StreamHandler(sys.stdout)],
    )

    log_level = training_args.get_process_log_level()
    logger.setLevel(log_level)
    transformers.utils.logging.set_verbosity(log_level)
    transformers.utils.logging.enable_default_handler()
    transformers.utils.logging.enable_explicit_format()

    # Log on each process the small summary:
    logger.warning(
        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
        +
        f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
    )
    logger.info(f"Training/evaluation parameters {training_args}")

    # Detecting last checkpoint.
    last_checkpoint = None
    if os.path.isdir(
            training_args.output_dir
    ) and training_args.do_train and not training_args.overwrite_output_dir:
        last_checkpoint = get_last_checkpoint(training_args.output_dir)
        if last_checkpoint is None and len(os.listdir(
                training_args.output_dir)) > 0:
            raise ValueError(
                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
                "Use --overwrite_output_dir to overcome.")
        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
            logger.info(
                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
            )

    # Initialize our dataset.
    ds = load_dataset(
        data_args.dataset_name,
        data_args.dataset_config_name,
        data_files=data_args.data_files,
        cache_dir=model_args.cache_dir,
    )

    # If we don't have a validation split, split off a percentage of train as validation.
    data_args.train_val_split = None if "validation" in ds.keys(
    ) else data_args.train_val_split
    if isinstance(data_args.train_val_split,
                  float) and data_args.train_val_split > 0.0:
        split = ds["train"].train_test_split(data_args.train_val_split)
        ds["train"] = split["train"]
        ds["validation"] = split["test"]

    # Load pretrained model and feature extractor
    #
    # Distributed training:
    # The .from_pretrained methods guarantee that only one local process can concurrently
    # download model & vocab.
    config_kwargs = {
        "cache_dir": model_args.cache_dir,
        "revision": model_args.model_revision,
        "use_auth_token": True if model_args.use_auth_token else None,
    }
    if model_args.config_name:
        config = ViTMAEConfig.from_pretrained(model_args.config_name,
                                              **config_kwargs)
    elif model_args.model_name_or_path:
        config = ViTMAEConfig.from_pretrained(model_args.model_name_or_path,
                                              **config_kwargs)
    else:
        config = ViTMAEConfig()
        logger.warning(
            "You are instantiating a new config instance from scratch.")
        if model_args.config_overrides is not None:
            logger.info(f"Overriding config: {model_args.config_overrides}")
            config.update_from_string(model_args.config_overrides)
            logger.info(f"New config: {config}")

    # adapt config
    config.update({
        "mask_ratio": model_args.mask_ratio,
        "norm_pix_loss": model_args.norm_pix_loss,
    })

    # create feature extractor
    if model_args.feature_extractor_name:
        feature_extractor = ViTFeatureExtractor.from_pretrained(
            model_args.feature_extractor_name, **config_kwargs)
    elif model_args.model_name_or_path:
        feature_extractor = ViTFeatureExtractor.from_pretrained(
            model_args.model_name_or_path, **config_kwargs)
    else:
        feature_extractor = ViTFeatureExtractor()

    # create model
    if model_args.model_name_or_path:
        model = ViTMAEForPreTraining.from_pretrained(
            model_args.model_name_or_path,
            from_tf=bool(".ckpt" in model_args.model_name_or_path),
            config=config,
            cache_dir=model_args.cache_dir,
            revision=model_args.model_revision,
            use_auth_token=True if model_args.use_auth_token else None,
        )
    else:
        logger.info("Training new model from scratch")
        model = ViTMAEForPreTraining(config)

    if training_args.do_train:
        column_names = ds["train"].column_names
    else:
        column_names = ds["validation"].column_names

    if data_args.image_column_name is not None:
        image_column_name = data_args.image_column_name
    elif "image" in column_names:
        image_column_name = "image"
    elif "img" in column_names:
        image_column_name = "img"
    else:
        image_column_name = column_names[0]

    # transformations as done in original MAE paper
    # source: https://github.com/facebookresearch/mae/blob/main/main_pretrain.py
    transforms = Compose([
        Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img),
        RandomResizedCrop(feature_extractor.size,
                          scale=(0.2, 1.0),
                          interpolation=InterpolationMode.BICUBIC),
        RandomHorizontalFlip(),
        ToTensor(),
        Normalize(mean=feature_extractor.image_mean,
                  std=feature_extractor.image_std),
    ])

    def preprocess_images(examples):
        """Preprocess a batch of images by applying transforms."""

        examples["pixel_values"] = [
            transforms(image) for image in examples[image_column_name]
        ]
        return examples

    if training_args.do_train:
        if "train" not in ds:
            raise ValueError("--do_train requires a train dataset")
        if data_args.max_train_samples is not None:
            ds["train"] = ds["train"].shuffle(seed=training_args.seed).select(
                range(data_args.max_train_samples))
        # Set the training transforms
        ds["train"].set_transform(preprocess_images)

    if training_args.do_eval:
        if "validation" not in ds:
            raise ValueError("--do_eval requires a validation dataset")
        if data_args.max_eval_samples is not None:
            ds["validation"] = (ds["validation"].shuffle(
                seed=training_args.seed).select(
                    range(data_args.max_eval_samples)))
        # Set the validation transforms
        ds["validation"].set_transform(preprocess_images)

    # Compute absolute learning rate
    total_train_batch_size = (training_args.train_batch_size *
                              training_args.gradient_accumulation_steps *
                              training_args.world_size)
    if training_args.base_learning_rate is not None:
        training_args.learning_rate = training_args.base_learning_rate * total_train_batch_size / 256

    # Initialize our trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=ds["train"] if training_args.do_train else None,
        eval_dataset=ds["validation"] if training_args.do_eval else None,
        tokenizer=feature_extractor,
        data_collator=collate_fn,
    )

    # Training
    if training_args.do_train:
        checkpoint = None
        if training_args.resume_from_checkpoint is not None:
            checkpoint = training_args.resume_from_checkpoint
        elif last_checkpoint is not None:
            checkpoint = last_checkpoint
        train_result = trainer.train(resume_from_checkpoint=checkpoint)
        trainer.save_model()
        trainer.log_metrics("train", train_result.metrics)
        trainer.save_metrics("train", train_result.metrics)
        trainer.save_state()

    # Evaluation
    if training_args.do_eval:
        metrics = trainer.evaluate()
        trainer.log_metrics("eval", metrics)
        trainer.save_metrics("eval", metrics)

    # Write model card and (optionally) push to hub
    kwargs = {
        "tasks": "masked-auto-encoding",
        "dataset": data_args.dataset_name,
        "tags": ["masked-auto-encoding"],
    }
    if training_args.push_to_hub:
        trainer.push_to_hub(**kwargs)
    else:
        trainer.create_model_card(**kwargs)
Ejemplo n.º 22
0
    def __init__(self, obj_detect_weights_path, tracktor_config_path,
                 obj_detect_config_path):
        super(DeepMOT, self).__init__("DeepMOT")
        normalize_mean = [0.485, 0.456, 0.406]
        normalize_std = [0.229, 0.224, 0.225]
        self.transforms = Compose(
            [ToTensor(), Normalize(normalize_mean, normalize_std)])

        with open(tracktor_config_path) as config_file:
            tracktor = yaml.load(config_file)["tracktor"]

        # set all seeds
        torch.manual_seed(tracktor["seed"])
        torch.cuda.manual_seed(tracktor["seed"])
        np.random.seed(tracktor["seed"])
        torch.backends.cudnn.deterministic = True

        ##########################
        # Initialize the modules #
        ##########################

        # object detection
        if tracktor["network"].startswith("fpn"):
            # FPN
            from src.tracktor.fpn import FPN
            from src.fpn.fpn.model.utils import config

            config.cfg.TRAIN.USE_FLIPPED = False
            config.cfg.CUDA = True
            config.cfg.TRAIN.USE_FLIPPED = False
            checkpoint = torch.load(obj_detect_weights_path)

            if "pooling_mode" in checkpoint.keys():
                config.cfg.POOLING_MODE = checkpoint["pooling_mode"]
            else:
                config.cfg.POOLING_MODE = "align"

            set_cfgs = [
                "ANCHOR_SCALES", "[4, 8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]"
            ]
            config.cfg_from_file(obj_detect_config_path)
            config.cfg_from_list(set_cfgs)

            if "fpn_1_12.pth" in obj_detect_weights_path:
                classes = (
                    "__background__",
                    "aeroplane",
                    "bicycle",
                    "bird",
                    "boat",
                    "bottle",
                    "bus",
                    "car",
                    "cat",
                    "chair",
                    "cow",
                    "diningtable",
                    "dog",
                    "horse",
                    "motorbike",
                    "person",
                    "pottedplant",
                    "sheep",
                    "sofa",
                    "train",
                    "tvmonitor",
                )
            else:
                classes = ("__background__", "pedestrian")

            obj_detect = FPN(classes, 101, pretrained=False)
            obj_detect.create_architecture()
            if "model" in checkpoint.keys():

                model_dcit = obj_detect.state_dict()
                model_dcit.update(checkpoint["model"])
                obj_detect.load_state_dict(model_dcit)

                # obj_detect.load_state_dict(checkpoint['model'])

                # obj_detect.load_state_dict(checkpoint['model'])
            else:
                # pick the reid branch
                model_dcit = obj_detect.state_dict()
                model_dcit.update(checkpoint)
                obj_detect.load_state_dict(model_dcit)

        else:
            raise NotImplementedError(
                f"Object detector type not known: {tracktor['network']}")

        obj_detect.eval()
        obj_detect.cuda()

        # tracktor
        self.tracker = Tracker(obj_detect, None, tracktor["tracker"])
Ejemplo n.º 23
0
wide = Wide(np.unique(X_wide).shape[0], 1)
deeptabular = TabMlp(
    mlp_hidden_dims=[32, 16],
    mlp_dropout=[0.5, 0.5],
    column_idx={k: v
                for v, k in enumerate(colnames)},
    embed_input=embed_input,
    continuous_cols=colnames[-5:],
)
deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0)
deepimage = DeepImage(pretrained=True)

# transforms
mean = [0.406, 0.456, 0.485]  # BGR
std = [0.225, 0.224, 0.229]  # BGR
transforms1 = [ToTensor, Normalize(mean=mean, std=std)]
transforms2 = [Normalize(mean=mean, std=std)]

deephead_ds = nn.Sequential(nn.Linear(16, 8), nn.Linear(8, 4))
deephead_dt = nn.Sequential(nn.Linear(64, 8), nn.Linear(8, 4))
deephead_di = nn.Sequential(nn.Linear(512, 8), nn.Linear(8, 4))

# #############################################################################
# Test that runs many possible scenarios of data inputs I can think off.
# Surely users will input something unexpected
# #############################################################################


@pytest.mark.parametrize(
    "X_wide, X_tab, X_text, X_img, X_train, X_val, target, val_split, transforms",
    [
Ejemplo n.º 24
0
                if not buffer:
                    break

                output.write(buffer)
                loop.update(len(buffer))

    if hashlib.sha256(open(download_target,
                           "rb").read()).hexdigest() != expected_sha256:
        raise RuntimeError(
            f"Model has been downloaded but the SHA256 checksum does not not match"
        )

    return download_target


normalize_image = Normalize((0.48145466, 0.4578275, 0.40821073),
                            (0.26862954, 0.26130258, 0.27577711))


def load(device=("cuda" if torch.cuda.is_available() else "cpu")):
    model_path = _download(MODEL_PATH)
    model = torch.jit.load(model_path, map_location=device).eval()
    n_px = model.input_resolution.item()

    transform = Compose([
        Resize(n_px, interpolation=Image.BICUBIC),
        CenterCrop(n_px),
        lambda image: image.convert("RGB"),
        ToTensor(),
        normalize_image,
    ])
Ejemplo n.º 25
0
except BaseException as e:
    print(
        "The visdom experienced an exception while running: {}\n"
        "The demo displays up-to-date functionality with the GitHub version, "
        "which may not yet be pushed to pip. Please upgrade using "
        "`pip install -e .` or `easy_install .`\n"
        "If this does not resolve the problem, please open an issue on "
        "our GitHub.".format(repr(e)))
#===============================================================================
# Model Setup
#===============================================================================

cuda_predicate = torch.cuda.is_available()

criterion = nn.CrossEntropyLoss()
transformation = Compose([ToTensor(), Normalize(mean=[0.2916], std=[0.2589])])

model = None
if (cuda_predicate == True):
    print("USING GPU")
    model = PETmodel().cuda()
else:
    print("USING CPU")
    model = PETmodel()

optimizer = optim.Adam(model.parameters(), lr=0.01)

test_data = PETDataset(
    'D:/ML_FPGA_compton_PET/image_compton_small_module_1mm_pitch',
    transform=transformation,
    test=True,
Ejemplo n.º 26
0
def get_train_eval_loaders(path, batch_size=256):
    """Setup the dataflow:
        - load CIFAR100 train and test datasets
        - setup train/test image transforms
            - horizontally flipped randomly and augmented using cutout.
            - each mini-batch contained 256 examples
        - setup train/test data loaders

    Returns:
        train_loader, test_loader, eval_train_loader
    """
    train_transform = Compose([
        Pad(4),
        RandomCrop(32),
        RandomHorizontalFlip(),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        RandomErasing(),
    ])

    test_transform = Compose([
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    train_dataset = CIFAR100(root=path,
                             train=True,
                             transform=train_transform,
                             download=True)
    test_dataset = CIFAR100(root=path,
                            train=False,
                            transform=test_transform,
                            download=False)

    train_eval_indices = [
        random.randint(0,
                       len(train_dataset) - 1)
        for i in range(len(test_dataset))
    ]
    train_eval_dataset = Subset(train_dataset, train_eval_indices)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              num_workers=12,
                              shuffle=True,
                              drop_last=True,
                              pin_memory=True)

    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             num_workers=12,
                             shuffle=False,
                             drop_last=False,
                             pin_memory=True)

    eval_train_loader = DataLoader(train_eval_dataset,
                                   batch_size=batch_size,
                                   num_workers=12,
                                   shuffle=False,
                                   drop_last=False,
                                   pin_memory=True)

    return train_loader, test_loader, eval_train_loader
Ejemplo n.º 27
0
    parser_train.add_argument('--exp', default='default')

    return parser.parse_args()


NUM_CHANNELS = 3
NUM_CLASSES = 2

color_transform = Colorize()
image_transform = ToPILImage()

input_transform = Compose([
    Scale(256),
    CenterCrop(256),
    ToTensor(),
    Normalize([.485, .456, .406], [.229, .224, .225]),
])

target_transform = Compose(
    [Scale(256), CenterCrop(256),
     ToLabel(), Relabel(255, 1)])


def get_model():
    Net = FCN8
    model = Net(NUM_CLASSES, './vgg_16.pth')
    return model


def train(opt, model, use_cuda):
    model.train()
Ejemplo n.º 28
0
    DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu:0')
    DIR_ANALYSIS = './SGAN/checkpoints/Analysis'
    DIR_IMAGE = './SGAN/checkpoints/Image/Training'
    DIR_MODEL = './SGAN/checkpoints/Model'
    EPOCHS = 25
    ITER_DISPLAY = 100
    ITER_REPORT = 10
    LATENT_DIM = 100
    LR = 2e-4
    N_D_STEP = 1

    os.makedirs(DIR_ANALYSIS, exist_ok=True)
    os.makedirs(DIR_IMAGE, exist_ok=True)
    os.makedirs(DIR_MODEL, exist_ok=True)

    transforms = Compose([ToTensor(), Normalize(mean=[0.5], std=[0.5])])
    dataset = MNIST(root='./datasets',
                    train=True,
                    transform=transforms,
                    download=True)
    data_loader = DataLoader(dataset=dataset,
                             batch_size=BATCH_SIZE,
                             shuffle=True,
                             num_workers=0)

    D = Discriminator(cnn=CNN).apply(weights_init).to(DEVICE)
    G = Generator(cnn=CNN).apply(weights_init).to(DEVICE)
    print(D, G)

    CELoss = nn.CrossEntropyLoss()
    BCELoss = nn.BCELoss()
Ejemplo n.º 29
0
def main():
    # See all possible arguments in src/transformers/training_args.py
    # or by passing the --help flag to this script.
    # We now keep distinct sets of args, for a cleaner separation of concerns.

    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
        # If we pass only one argument to the script and it's the path to a json file,
        # let's parse it to get our arguments.
        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
    else:
        model_args, data_args, training_args = parser.parse_args_into_dataclasses()

    # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
    # information sent is the one passed as arguments along with your Python/PyTorch versions.
    send_example_telemetry("run_mim", model_args, data_args)

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        handlers=[logging.StreamHandler(sys.stdout)],
    )

    log_level = training_args.get_process_log_level()
    logger.setLevel(log_level)
    transformers.utils.logging.set_verbosity(log_level)
    transformers.utils.logging.enable_default_handler()
    transformers.utils.logging.enable_explicit_format()

    # Log on each process the small summary:
    logger.warning(
        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
    )
    logger.info(f"Training/evaluation parameters {training_args}")

    # Detecting last checkpoint.
    last_checkpoint = None
    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
        last_checkpoint = get_last_checkpoint(training_args.output_dir)
        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
            raise ValueError(
                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
                "Use --overwrite_output_dir to overcome."
            )
        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
            logger.info(
                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
            )

    # Initialize our dataset.
    ds = load_dataset(
        data_args.dataset_name,
        data_args.dataset_config_name,
        data_files=data_args.data_files,
        cache_dir=model_args.cache_dir,
        use_auth_token=True if model_args.use_auth_token else None,
    )

    # If we don't have a validation split, split off a percentage of train as validation.
    data_args.train_val_split = None if "validation" in ds.keys() else data_args.train_val_split
    if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0:
        split = ds["train"].train_test_split(data_args.train_val_split)
        ds["train"] = split["train"]
        ds["validation"] = split["test"]

    # Create config
    # Distributed training:
    # The .from_pretrained methods guarantee that only one local process can concurrently
    # download model & vocab.
    config_kwargs = {
        "cache_dir": model_args.cache_dir,
        "revision": model_args.model_revision,
        "use_auth_token": True if model_args.use_auth_token else None,
    }
    if model_args.config_name_or_path:
        config = AutoConfig.from_pretrained(model_args.config_name_or_path, **config_kwargs)
    elif model_args.model_name_or_path:
        config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs)
    else:
        config = CONFIG_MAPPING[model_args.model_type]()
        logger.warning("You are instantiating a new config instance from scratch.")
        if model_args.config_overrides is not None:
            logger.info(f"Overriding config: {model_args.config_overrides}")
            config.update_from_string(model_args.config_overrides)
            logger.info(f"New config: {config}")

    # make sure the decoder_type is "simmim" (only relevant for BEiT)
    if hasattr(config, "decoder_type"):
        config.decoder_type = "simmim"

    # adapt config
    model_args.image_size = model_args.image_size if model_args.image_size is not None else config.image_size
    model_args.patch_size = model_args.patch_size if model_args.patch_size is not None else config.patch_size
    model_args.encoder_stride = (
        model_args.encoder_stride if model_args.encoder_stride is not None else config.encoder_stride
    )

    config.update(
        {
            "image_size": model_args.image_size,
            "patch_size": model_args.patch_size,
            "encoder_stride": model_args.encoder_stride,
        }
    )

    # create feature extractor
    if model_args.feature_extractor_name:
        feature_extractor = AutoFeatureExtractor.from_pretrained(model_args.feature_extractor_name, **config_kwargs)
    elif model_args.model_name_or_path:
        feature_extractor = AutoFeatureExtractor.from_pretrained(model_args.model_name_or_path, **config_kwargs)
    else:
        FEATURE_EXTRACTOR_TYPES = {
            conf.model_type: feature_extractor_class
            for conf, feature_extractor_class in FEATURE_EXTRACTOR_MAPPING.items()
        }
        feature_extractor = FEATURE_EXTRACTOR_TYPES[model_args.model_type]()

    # create model
    if model_args.model_name_or_path:
        model = AutoModelForMaskedImageModeling.from_pretrained(
            model_args.model_name_or_path,
            from_tf=bool(".ckpt" in model_args.model_name_or_path),
            config=config,
            cache_dir=model_args.cache_dir,
            revision=model_args.model_revision,
            use_auth_token=True if model_args.use_auth_token else None,
        )
    else:
        logger.info("Training new model from scratch")
        model = AutoModelForMaskedImageModeling.from_config(config)

    if training_args.do_train:
        column_names = ds["train"].column_names
    else:
        column_names = ds["validation"].column_names

    if data_args.image_column_name is not None:
        image_column_name = data_args.image_column_name
    elif "image" in column_names:
        image_column_name = "image"
    elif "img" in column_names:
        image_column_name = "img"
    else:
        image_column_name = column_names[0]

    # transformations as done in original SimMIM paper
    # source: https://github.com/microsoft/SimMIM/blob/main/data/data_simmim.py
    transforms = Compose(
        [
            Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img),
            RandomResizedCrop(model_args.image_size, scale=(0.67, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0)),
            RandomHorizontalFlip(),
            ToTensor(),
            Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),
        ]
    )

    # create mask generator
    mask_generator = MaskGenerator(
        input_size=model_args.image_size,
        mask_patch_size=data_args.mask_patch_size,
        model_patch_size=model_args.patch_size,
        mask_ratio=data_args.mask_ratio,
    )

    def preprocess_images(examples):
        """Preprocess a batch of images by applying transforms + creating a corresponding mask, indicating
        which patches to mask."""

        examples["pixel_values"] = [transforms(image) for image in examples[image_column_name]]
        examples["mask"] = [mask_generator() for i in range(len(examples[image_column_name]))]

        return examples

    if training_args.do_train:
        if "train" not in ds:
            raise ValueError("--do_train requires a train dataset")
        if data_args.max_train_samples is not None:
            ds["train"] = ds["train"].shuffle(seed=training_args.seed).select(range(data_args.max_train_samples))
        # Set the training transforms
        ds["train"].set_transform(preprocess_images)

    if training_args.do_eval:
        if "validation" not in ds:
            raise ValueError("--do_eval requires a validation dataset")
        if data_args.max_eval_samples is not None:
            ds["validation"] = (
                ds["validation"].shuffle(seed=training_args.seed).select(range(data_args.max_eval_samples))
            )
        # Set the validation transforms
        ds["validation"].set_transform(preprocess_images)

    # Initialize our trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=ds["train"] if training_args.do_train else None,
        eval_dataset=ds["validation"] if training_args.do_eval else None,
        tokenizer=feature_extractor,
        data_collator=collate_fn,
    )

    # Training
    if training_args.do_train:
        checkpoint = None
        if training_args.resume_from_checkpoint is not None:
            checkpoint = training_args.resume_from_checkpoint
        elif last_checkpoint is not None:
            checkpoint = last_checkpoint
        train_result = trainer.train(resume_from_checkpoint=checkpoint)
        trainer.save_model()
        trainer.log_metrics("train", train_result.metrics)
        trainer.save_metrics("train", train_result.metrics)
        trainer.save_state()

    # Evaluation
    if training_args.do_eval:
        metrics = trainer.evaluate()
        trainer.log_metrics("eval", metrics)
        trainer.save_metrics("eval", metrics)

    # Write model card and (optionally) push to hub
    kwargs = {
        "finetuned_from": model_args.model_name_or_path,
        "tasks": "masked-image-modeling",
        "dataset": data_args.dataset_name,
        "tags": ["masked-image-modeling"],
    }
    if training_args.push_to_hub:
        trainer.push_to_hub(**kwargs)
    else:
        trainer.create_model_card(**kwargs)
Ejemplo n.º 30
0
 def __init__(self, path):
     self.transform = ToTensor()
     self.norm = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
     self.imgs = [imread(x) for x in glob(os.path.join(path,'*.png'))]