예제 #1
0
def main(args):
    config = load_config(args.config)
    check_channels(config)
    check_classes(config)

    assert torch.cuda.is_available(
    ), "No GPU support found. Check CUDA and NVidia Driver install."
    assert torch.distributed.is_nccl_available(
    ), "No NCCL support found. Check your PyTorch install."

    world_size = torch.cuda.device_count()
    args.bs = args.bs if args.bs is not None else math.floor(os.cpu_count() /
                                                             world_size)
    args.workers = args.workers if args.workers is not None else args.bs

    palette, transparency = make_palette(
        [classe["color"] for classe in config["classes"]])
    args.cover = [
        tile for tile in tiles_from_csv(os.path.expanduser(args.cover))
    ] if args.cover else None

    args.out = os.path.expanduser(args.out)
    log = Logs(os.path.join(args.out, "log"))

    chkpt = torch.load(args.checkpoint, map_location=torch.device("cpu"))
    log.log("abd predict on {} GPUs, with {} workers/GPU and {} tiles/batch".
            format(world_size, args.workers, args.bs))
    log.log("Model {} - UUID: {}".format(chkpt["nn"], chkpt["uuid"]))
    log.log("---")
    loader = load_module("abd_model.loaders.{}".format(
        chkpt["loader"].lower()))

    lock_file = os.path.abspath(os.path.join(args.out, str(uuid.uuid1())))

    dataset = getattr(loader, chkpt["loader"])(
        config,
        chkpt["shape_in"][1:3],
        args.dataset,
        args.cover,
        mode="predict",
        metatiles=args.metatiles,
        keep_borders=args.keep_borders,
    )

    mp.spawn(gpu_worker,
             nprocs=world_size,
             args=(world_size, lock_file, args, config, dataset, palette,
                   transparency))

    if os.path.exists(lock_file):
        os.remove(lock_file)

    if not args.no_web_ui and dataset.cover:
        template = "leaflet.html" if not args.web_ui_template else args.web_ui_template
        base_url = args.web_ui_base_url if args.web_ui_base_url else "."
        web_ui(args.out, base_url, dataset.cover, dataset.cover, "png",
               template)
예제 #2
0
 def __init__(self, metrics, classes, config=None):
     self.config = config
     self.classes = classes
     self.metrics = []
     for classe in classes:
         self.metrics.append({metric: [] for metric in metrics})
     self.modules = {
         metric: load_module("abd_model.metrics." + metric)
         for metric in metrics
     }
     self.n = 0
예제 #3
0
    def __init__(self, shape_in, shape_out, encoder="resnet50", train_config=None):
        super().__init__()

        doc = "U-Net like encoder-decoder architecture with a ResNet, ResNext or WideResNet encoder.\n\n"
        doc += " - https://arxiv.org/abs/1505.04597 - U-Net: Convolutional Networks for Biomedical Image Segmentation\n"

        if encoder in ["resnet50", "resnet101", "resnet152"]:
            doc += " - https://arxiv.org/abs/1512.03385 - Deep Residual Learning for Image Recognition\n"
        elif encoder in ["resnext50_32x4d", "resnext101_32x8d"]:
            doc += " - https://arxiv.org/pdf/1611.05431 - Aggregated Residual Transformations for DNN\n"
        elif encoder in ["wide_resnet50_2", "wide_resnet101_2"]:
            doc += " - https://arxiv.org/abs/1605.07146 - Wide Residual Networks\n"
        else:
            encoders = "resnet50, resnet101, resnet152, resnext50_32x4d, resnext101_32x8d, wide_resnet50_2, wide_resnet101_2"
            assert False, "Albunet, expects as encoder: " + encoders

        self.version = 2
        self.doc_string = doc

        num_filters = 32
        num_channels = shape_in[0]
        num_classes = shape_out[0]

        assert num_channels, "Empty Channels"
        assert num_classes, "Empty Classes"

        try:
            pretrained = train_config["pretrained"]
        except:
            pretrained = False

        models = load_module("torchvision.models")
        self.encoder = getattr(models, encoder)(pretrained=pretrained)
        # https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py

        if num_channels != 3:
            weights = nn.init.kaiming_normal_(torch.zeros((64, num_channels, 7, 7)), mode="fan_out", nonlinearity="relu")
            if pretrained:
                for c in range(min(num_channels, 3)):
                    weights.data[:, c, :, :] = self.encoder.conv1.weight.data[:, c, :, :]
            self.encoder.conv1 = nn.Conv2d(num_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
            self.encoder.conv1.weight = nn.Parameter(weights)

        self.center = DecoderBlock(2048, num_filters * 8)

        self.dec0 = DecoderBlock(2048 + num_filters * 8, num_filters * 8)
        self.dec1 = DecoderBlock(1024 + num_filters * 8, num_filters * 8)
        self.dec2 = DecoderBlock(512 + num_filters * 8, num_filters * 2)
        self.dec3 = DecoderBlock(256 + num_filters * 2, num_filters * 2 * 2)
        self.dec4 = DecoderBlock(num_filters * 2 * 2, num_filters)
        self.dec5 = ConvRelu(num_filters, num_filters)

        self.final = nn.Conv2d(num_filters, num_classes, kernel_size=1)
예제 #4
0
def main(args):

    try:
        module = load_module("abd_model.osm.{}".format(args.type.lower()))
    except:
        sys.exit("ERROR: Unavailable --type {}".format(args.type))

    if os.path.dirname(os.path.expanduser(args.out)):
        os.makedirs(os.path.dirname(os.path.expanduser(args.out)), exist_ok=True)
    osmium_handler = getattr(module, "{}Handler".format(args.type))()

    print("abd extract {} from {} to {}".format(args.type, args.pbf, args.out), file=sys.stderr, flush=True)
    print("\nNOTICE: could take time. Be patient...\n".format(args.type, args.pbf, args.out), file=sys.stderr, flush=True)

    osmium_handler.apply_file(filename=os.path.expanduser(args.pbf), locations=True)
    osmium_handler.save(os.path.expanduser(args.out))
def main(args):

    if not args.masks or not args.labels:
        assert args.mode != "list", "Parameters masks and labels are mandatories in list mode."
        assert not (
            args.min or args.max
        ), "Both --masks and --labels mandatory, for metric filtering."

    if args.min or args.max:
        config = load_config(args.config)

    args.out = os.path.expanduser(args.out)
    cover = [tile for tile in tiles_from_csv(os.path.expanduser(args.cover))
             ] if args.cover else None

    args_minmax = set()
    args.min = {(m[0], m[1]): m[2] for m in args.min} if args.min else dict()
    args.max = {(m[0], m[1]): m[2] for m in args.max} if args.max else dict()
    args_minmax.update(args.min.keys())
    args_minmax.update(args.max.keys())
    minmax = dict()
    for mm in args_minmax:
        mm_min = float(args.min[mm]) if mm in args.min else 0.0
        mm_max = float(args.max[mm]) if mm in args.max else 1.0
        assert mm_min < mm_max, "--min must be lower than --max, on {}".format(
            mm)
        minmax[mm] = {
            "min":
            mm_min,
            "max":
            mm_max,
            "class_id": [
                c for c, classe in enumerate(config["classes"])
                if classe["title"] == mm[0]
            ][0],
            "module":
            load_module("abd_model.metrics." + mm[1]),
        }

    if not args.workers:
        args.workers = os.cpu_count()

    print("abd compare {} on CPU, with {} workers".format(
        args.mode, args.workers),
          file=sys.stderr,
          flush=True)

    if args.images:
        tiles = [tile for tile in tiles_from_dir(args.images[0], cover=cover)]
        assert len(tiles), "Empty images dir: {}".format(args.images[0])

        for image in args.images[1:]:
            assert sorted(tiles) == sorted([
                tile for tile in tiles_from_dir(image, cover=cover)
            ]), "Unconsistent images dirs"

    if args.labels and args.masks:
        tiles_masks = [
            tile for tile in tiles_from_dir(args.masks, cover=cover)
        ]
        tiles_labels = [
            tile for tile in tiles_from_dir(args.labels, cover=cover)
        ]
        if args.images:
            assert sorted(tiles) == sorted(tiles_masks) == sorted(
                tiles_labels), "Unconsistent images/label/mask directories"
        else:
            assert len(tiles_masks), "Empty masks dir: {}".format(args.masks)
            assert len(tiles_labels), "Empty labels dir: {}".format(
                args.labels)
            assert sorted(tiles_masks) == sorted(
                tiles_labels), "Label and Mask directories are not consistent"
            tiles = tiles_masks

    tiles_list = []
    tiles_compare = []
    progress = tqdm(total=len(tiles), ascii=True, unit="tile")
    log = False if args.mode == "list" else Logs(os.path.join(args.out, "log"))

    with futures.ThreadPoolExecutor(args.workers) as executor:

        def worker(tile):
            x, y, z = list(map(str, tile))

            if args.masks and args.labels:

                label = np.array(
                    Image.open(
                        os.path.join(args.labels, z, x, "{}.png".format(y))))
                mask = np.array(
                    Image.open(
                        os.path.join(args.masks, z, x, "{}.png".format(y))))

                assert label.shape == mask.shape, "Inconsistent tiles (size or dimensions)"

                metrics = dict()
                for mm in minmax:
                    try:
                        metrics[mm] = getattr(minmax[mm]["module"], "get")(
                            torch.as_tensor(label, device="cpu"),
                            torch.as_tensor(mask, device="cpu"),
                            minmax[mm]["class_id"],
                        )
                    except:
                        progress.update()
                        return False, tile

                    if not (minmax[mm]["min"] <= metrics[mm] <=
                            minmax[mm]["max"]):
                        progress.update()
                        return True, tile

            tiles_compare.append(tile)

            if args.mode == "side":
                for i, root in enumerate(args.images):
                    img = tile_image_from_file(tile_from_xyz(root, x, y, z)[1],
                                               force_rgb=True)

                    if i == 0:
                        side = np.zeros(
                            (img.shape[0], img.shape[1] * len(args.images), 3))
                        side = np.swapaxes(side, 0,
                                           1) if args.vertical else side
                        image_shape = img.shape
                    else:
                        assert image_shape[0:2] == img.shape[
                            0:2], "Unconsistent image size to compare"

                    if args.vertical:
                        side[i * image_shape[0]:(i + 1) *
                             image_shape[0], :, :] = img
                    else:
                        side[:, i * image_shape[0]:(i + 1) *
                             image_shape[0], :] = img

                tile_image_to_file(args.out, tile, np.uint8(side))

            elif args.mode == "stack":
                for i, root in enumerate(args.images):
                    tile_image = tile_image_from_file(tile_from_xyz(
                        root, x, y, z)[1],
                                                      force_rgb=True)

                    if i == 0:
                        image_shape = tile_image.shape[0:2]
                        stack = tile_image / len(args.images)
                    else:
                        assert image_shape == tile_image.shape[
                            0:2], "Unconsistent image size to compare"
                        stack = stack + (tile_image / len(args.images))

                tile_image_to_file(args.out, tile, np.uint8(stack))

            elif args.mode == "list":
                tiles_list.append([tile, metrics])

            progress.update()
            return True, tile

        for ok, tile in executor.map(worker, tiles):
            if not ok and log:
                log.log("Warning: skipping. {}".format(str(tile)))

    if args.mode == "list":
        with open(args.out, mode="w") as out:

            if args.geojson:
                out.write('{"type":"FeatureCollection","features":[')
                first = True

            for tile_list in tiles_list:
                tile, metrics = tile_list
                x, y, z = list(map(str, tile))

                if args.geojson:
                    prop = '"properties":{{"x":{},"y":{},"z":{}'.format(
                        x, y, z)
                    for metric in metrics:
                        prop += ',"{}":{:.3f}'.format(metric, metrics[metric])
                    geom = '"geometry":{}'.format(
                        json.dumps(feature(tile, precision=6)["geometry"]))
                    out.write('{}{{"type":"Feature",{},{}}}}}'.format(
                        "," if not first else "", geom, prop))
                    first = False

                if not args.geojson:
                    out.write("{},{},{}".format(x, y, z))
                    for metric in metrics:
                        out.write("\t{:.3f}".format(metrics[metric]))
                    out.write(os.linesep)

            if args.geojson:
                out.write("]}")

            out.close()

    base_url = args.web_ui_base_url if args.web_ui_base_url else "."

    if args.mode == "side" and not args.no_web_ui:
        template = "compare.html" if not args.web_ui_template else args.web_ui_template
        web_ui(args.out,
               base_url,
               tiles,
               tiles_compare,
               args.format,
               template,
               union_tiles=False)

    if args.mode == "stack" and not args.no_web_ui:
        template = "leaflet.html" if not args.web_ui_template else args.web_ui_template
        tiles = [tile for tile in tiles_from_dir(args.images[0])]
        web_ui(args.out, base_url, tiles, tiles_compare, args.format, template)
예제 #6
0
def main(args):
    config = load_config(args.config)
    args.cover = [
        tile for tile in tiles_from_csv(os.path.expanduser(args.cover))
    ] if args.cover else None
    if args.classes_weights:
        try:
            args.classes_weights = list(
                map(float, args.classes_weights.split(",")))
        except:
            assert args.classes_weights == "auto", "invalid --classes_weights value"
            args.classes_weights = compute_classes_weights(
                args.dataset, config["classes"], args.cover, os.cpu_count())
    else:
        args.classes_weights = [
            classe["weight"] for classe in config["classes"]
        ]

    args.tiles_weights = ([(tile, weight) for tile, weight in tiles_from_csv(
        os.path.expanduser(args.tiles_weights), extra_columns=True)]
                          if args.tiles_weights else None)

    args.bs = args.bs if args.bs else config["train"]["bs"]
    check_classes(config)
    check_channels(config)
    check_model(config)

    assert torch.cuda.is_available(
    ), "No GPU support found. Check CUDA and NVidia Driver install."
    assert torch.distributed.is_nccl_available(
    ), "No NCCL support found. Check your PyTorch install."
    world_size = 1  # Hard Coded since eval MultiGPUs not yet implemented

    args.workers = min(args.bs if not args.workers else args.workers,
                       math.floor(os.cpu_count() / world_size))

    print("abd eval on 1 GPU, with {} workers, and {} tiles/batch".format(
        args.workers, args.bs))

    loader = load_module("abd_model.loaders.{}".format(
        config["model"]["loader"].lower()))

    assert os.path.isdir(os.path.expanduser(
        args.dataset)), "--dataset path is not a directory"
    dataset = getattr(loader,
                      config["model"]["loader"])(config, config["model"]["ts"],
                                                 args.dataset, args.cover,
                                                 args.tiles_weights, "eval")
    assert len(dataset), "Empty or Invalid --dataset content"
    shape_in = dataset.shape_in
    shape_out = dataset.shape_out
    print("DataSet Eval:            {}".format(args.dataset))

    print("\n--- Input tensor")
    num_channel = 1  # 1-based numerotation
    for channel in config["channels"]:
        for band in channel["bands"]:
            print("Channel {}:\t\t {} - (band:{})".format(
                num_channel, channel["name"], band))
            num_channel += 1

    print("\n--- Output Classes ---")
    for c, classe in enumerate(config["classes"]):
        print("Class {}:\t\t {} ({:.2f})".format(c, classe["title"],
                                                 args.classes_weights[c]))

    print("\n--- Model ---")
    for hp in config["model"]:
        print("{}{}".format(hp.ljust(25, " "), config["model"][hp]))

    lock_file = os.path.abspath(os.path.join("/tmp", str(uuid.uuid1())))
    mp.spawn(gpu_worker,
             nprocs=world_size,
             args=(world_size, lock_file, dataset, shape_in, shape_out, args,
                   config))
    if os.path.exists(lock_file):
        os.remove(lock_file)
예제 #7
0
def gpu_worker(rank, world_size, lock_file, dataset, shape_in, shape_out, args,
               config):

    dist.init_process_group(backend="nccl",
                            init_method="file://" + lock_file,
                            world_size=world_size,
                            rank=rank)
    torch.cuda.set_device(rank)
    torch.manual_seed(0)

    loader = DataLoader(dataset,
                        batch_size=args.bs,
                        shuffle=False,
                        drop_last=True,
                        num_workers=args.workers)

    nn_module = load_module("abd_model.nn.{}".format(
        config["model"]["nn"].lower()))
    nn = getattr(nn_module,
                 config["model"]["nn"])(shape_in, shape_out,
                                        config["model"]["encoder"].lower(),
                                        config["train"]).cuda(rank)
    nn = DistributedDataParallel(nn,
                                 device_ids=[rank],
                                 find_unused_parameters=True)

    if args.checkpoint:
        chkpt = torch.load(os.path.expanduser(args.checkpoint),
                           map_location="cuda:{}".format(rank))
        assert nn.module.version == chkpt[
            "model_version"], "Model Version mismatch"
        nn.load_state_dict(chkpt["state_dict"])

        if rank == 0:
            print("\n--- Using Checkpoint ---")
            print("Path:\t\t {}".format(args.checkpoint))
            print("UUID:\t\t {}".format(chkpt["uuid"]))

    nn.eval()
    with torch.no_grad():
        args.metrics = args.metrics if args.metrics else config["train"][
            "metrics"]
        metrics = Metrics(args.metrics, config["classes"], config=config)

        assert len(loader), "Empty or Inconsistent DataSet"
        dataloader = tqdm(loader, desc="Eval", unit="Batch",
                          ascii=True) if rank == 0 else loader

        for images, masks, tiles, tiles_weights in dataloader:
            images = images.cuda(rank, non_blocking=True)
            masks = masks.cuda(rank, non_blocking=True)
            outputs = nn(images)

            if rank == 0:
                for mask, output in zip(masks, outputs):
                    metrics.add(mask, output)

        if rank == 0:
            print("\n{}  μ\t   σ".format(" ".ljust(25, " ")))
            for c, classe in enumerate(config["classes"]):
                if classe["weight"] != 0.0 and classe["color"] != "transparent":
                    for k, v in metrics.get()[c].items():
                        print("{}{:.3f}\t {:.3f}".format(
                            (classe["title"] + " " + k).ljust(25, " "), v["μ"],
                            v["σ"]))

    dist.destroy_process_group()
예제 #8
0
def gpu_worker(rank, world_size, lock_file, dataset, shape_in, shape_out, args,
               config):

    log = Logs(os.path.join(args.out, "log")) if rank == 0 else None

    dist.init_process_group(backend="nccl",
                            init_method="file://" + lock_file,
                            world_size=world_size,
                            rank=rank)
    torch.cuda.set_device(rank)
    torch.manual_seed(0)

    bs = config["train"]["bs"]

    sampler = torch.utils.data.distributed.DistributedSampler(
        dataset, num_replicas=world_size, rank=rank)
    loader = DataLoader(dataset,
                        batch_size=bs,
                        shuffle=False,
                        drop_last=True,
                        num_workers=args.workers,
                        sampler=sampler)

    nn_module = load_module("abd_model.nn.{}".format(
        config["model"]["nn"].lower()))
    nn = getattr(nn_module,
                 config["model"]["nn"])(shape_in, shape_out,
                                        config["model"]["encoder"].lower(),
                                        config["train"]).cuda(rank)
    nn = DistributedDataParallel(nn,
                                 device_ids=[rank],
                                 find_unused_parameters=True)

    optimizer_params = {
        key: value
        for key, value in config["train"]["optimizer"].items() if key != "name"
    }
    optimizer = getattr(torch.optim, config["train"]["optimizer"]["name"])(
        nn.parameters(), **optimizer_params)

    if rank == 0:
        log.log("\n--- Train ---")
        for hp in config["train"]:
            if hp == "da":
                da = config["train"]["da"]["name"]
                dap = config["train"]["da"]["p"]
                log.log("{}{} ({:.2f})".format("da".ljust(25, " "), da, dap))
            elif hp == "metrics":
                log.log("{}{}".format(hp.ljust(25, " "),
                                      set(config["train"][hp])))  # aesthetic
            elif hp != "optimizer":
                log.log("{}{}".format(hp.ljust(25, " "), config["train"][hp]))

        log.log("{}{}".format("optimizer".ljust(25, " "),
                              config["train"]["optimizer"]["name"]))
        for k, v in optimizer.state_dict()["param_groups"][0].items():
            if k != "params":
                log.log(" - {}{}".format(k.ljust(25 - 3, " "), v))

    resume = 0
    if args.checkpoint:
        chkpt = torch.load(os.path.expanduser(args.checkpoint),
                           map_location="cuda:{}".format(rank))
        assert nn.module.version == chkpt[
            "model_version"], "Model Version mismatch"
        nn.load_state_dict(chkpt["state_dict"])

        if rank == 0:
            log.log("\n--- Using Checkpoint ---")
            log.log("Path:\t\t {}".format(args.checkpoint))
            log.log("UUID:\t\t {}".format(chkpt["uuid"]))

        if args.resume:
            optimizer.load_state_dict(chkpt["optimizer"])
            resume = chkpt["epoch"]
            assert resume < args.epochs, "Epoch asked, already reached by the given checkpoint"

    loss_module = load_module("abd_model.losses.{}".format(
        config["train"]["loss"].lower()))
    criterion = getattr(loss_module, config["train"]["loss"])().cuda(rank)

    for epoch in range(resume + 1, args.epochs + 1):  # 1-N based

        if rank == 0:
            log.log("\n---\nEpoch: {}/{}\n".format(epoch, args.epochs))

        sampler.set_epoch(
            epoch)  # https://github.com/pytorch/pytorch/issues/31232
        do_epoch(rank, loader, config, args.classes_weights, log, nn,
                 criterion, epoch, optimizer)

        if rank == 0:
            UUID = uuid.uuid1()
            states = {
                "uuid": UUID,
                "model_version": nn.module.version,
                "producer_name": "Neat-EO.pink",
                "producer_version": abd.__version__,
                "model_licence": "MIT",
                "domain": "pink.Neat-EO",  # reverse-DNS
                "doc_string": nn.module.doc_string,
                "shape_in": shape_in,
                "shape_out": shape_out,
                "state_dict": nn.state_dict(),
                "epoch": epoch,
                "nn": config["model"]["nn"],
                "encoder": config["model"]["encoder"],
                "optimizer": optimizer.state_dict(),
                "loader": config["model"]["loader"],
            }
            checkpoint_path = os.path.join(
                args.out, "checkpoint-{:05d}.pth".format(epoch))
            if epoch == args.epochs or not (epoch % args.saving):
                log.log("\n--- Saving Checkpoint ---")
                log.log("Path:\t\t {}".format(checkpoint_path))
                log.log("UUID:\t\t {}\n".format(UUID))
                torch.save(states, checkpoint_path)

        dist.barrier()

    dist.destroy_process_group()
예제 #9
0
def main(args):

    chkpt = torch.load(os.path.expanduser(args.checkpoint),
                       map_location=torch.device("cpu"))
    assert chkpt, "Unable to load checkpoint {}".format(args.checkpoint)

    if os.path.dirname(os.path.expanduser(args.out)):
        os.makedirs(os.path.dirname(os.path.expanduser(args.out)),
                    exist_ok=True)
    args.out = os.path.expanduser(args.out)

    UUID = chkpt["uuid"] if "uuid" in chkpt else uuid.uuid1()

    try:
        nn_name = chkpt["nn"]
    except:
        assert args.nn, "--nn mandatory as not already in input .pth"
        nn_name = args.nn

    try:
        encoder = chkpt["encoder"]
    except:
        assert args.encoder, "--encoder mandatory as not already in input .pth"
        encoder = args.encoder

    try:
        loader = chkpt["loader"]
    except:
        assert args.loader, "--loader mandatory as not already in input .pth"
        doc_string = args.doc_string

    try:
        doc_string = chkpt["doc_string"]
    except:
        assert args.doc_string, "--doc_string mandatory as not already in input .pth"
        doc_string = args.doc_string

    try:
        shape_in = chkpt["shape_in"]
    except:
        assert args.shape_in, "--shape_in mandatory as not already in input .pth"
        shape_in = tuple(map(int, args.shape_in.split(",")))

    try:
        shape_out = chkpt["shape_out"]
    except:
        assert args.shape_out, "--shape_out mandatory as not already in input .pth"
        shape_out = tuple(map(int, args.shape_out.split(",")))

    nn_module = load_module("abd_model.nn.{}".format(nn_name.lower()))
    nn = getattr(nn_module, nn_name)(shape_in, shape_out,
                                     encoder.lower()).to("cpu")

    print("abd export model to {}".format(args.type), file=sys.stderr)
    print("Model: {}".format(nn_name, file=sys.stderr))
    print("UUID: {}".format(UUID, file=sys.stderr))

    if args.type == "pth":

        states = {
            "uuid": UUID,
            "model_version": None,
            "producer_name": "abd_model",
            "producer_version": abd.__version__,
            "model_licence": "MIT",
            "domain": "abd_model",  # reverse-DNS
            "doc_string": doc_string,
            "shape_in": shape_in,
            "shape_out": shape_out,
            "state_dict": nn.state_dict(),
            "epoch": 0,
            "nn": nn_name,
            "encoder": encoder,
            "optimizer": None,
            "loader": loader,
        }

        torch.save(states, args.out)

    else:

        try:  # https://github.com/pytorch/pytorch/issues/9176
            nn.module.state_dict(chkpt["state_dict"])
        except AttributeError:
            nn.state_dict(chkpt["state_dict"])

        nn.eval()

        batch = torch.rand(1, *shape_in)

        if args.type == "onnx":
            torch.onnx.export(
                nn,
                torch.autograd.Variable(batch),
                args.out,
                input_names=["input", "shape_in", "shape_out"],
                output_names=["output"],
                dynamic_axes={
                    "input": {
                        0: "num_batch"
                    },
                    "output": {
                        0: "num_batch"
                    }
                },
            )

        if args.type == "jit":
            torch.jit.trace(nn, batch).save(args.out)
예제 #10
0
def gpu_worker(rank, world_size, lock_file, args, config, dataset, palette,
               transparency):

    dist.init_process_group(backend="nccl",
                            init_method="file://" + lock_file,
                            world_size=world_size,
                            rank=rank)
    torch.cuda.set_device(rank)
    chkpt = torch.load(args.checkpoint, map_location=torch.device(rank))
    nn_module = load_module("abd_model.nn.{}".format(chkpt["nn"].lower()))
    nn = getattr(nn_module, chkpt["nn"])(chkpt["shape_in"], chkpt["shape_out"],
                                         chkpt["encoder"].lower()).to(rank)
    nn = DistributedDataParallel(nn,
                                 device_ids=[rank],
                                 find_unused_parameters=True)

    chkpt = torch.load(os.path.expanduser(args.checkpoint),
                       map_location="cuda:{}".format(rank))
    assert nn.module.version == chkpt[
        "model_version"], "Model Version mismatch"
    nn.load_state_dict(chkpt["state_dict"])

    sampler = torch.utils.data.distributed.DistributedSampler(
        dataset, num_replicas=world_size, rank=rank)
    loader = DataLoader(dataset,
                        batch_size=args.bs,
                        shuffle=False,
                        num_workers=args.workers,
                        sampler=sampler)
    assert len(loader), "Empty predict dataset directory. Check your path."

    C, W, H = chkpt["shape_out"]

    nn.eval()
    with torch.no_grad():

        dataloader = tqdm(loader, desc="Predict", unit="Batch/GPU",
                          ascii=True) if rank == 0 else loader

        for images, tiles in dataloader:

            if args.metatiles:
                N = images.shape[0]
                qs = int(W / 4)
                hs = int(W / 2)
                ts = int(W)

                # fmt:off
                probs = np.zeros((N, C, W, H), dtype=np.float)
                probs[:, :, 0:hs, 0:hs] = nn(
                    images[:, :, 0:ts,
                           0:ts].to(rank)).data.cpu().numpy()[:, :, qs:-qs,
                                                              qs:-qs]
                probs[:, :, 0:hs, hs:] = nn(
                    images[:, :, 0:ts,
                           hs:].to(rank)).data.cpu().numpy()[:, :, qs:-qs,
                                                             qs:-qs]
                probs[:, :, hs:, 0:hs] = nn(
                    images[:, :, hs:,
                           0:ts].to(rank)).data.cpu().numpy()[:, :, qs:-qs,
                                                              qs:-qs]
                probs[:, :, hs:, hs:] = nn(
                    images[:, :, hs:,
                           hs:].to(rank)).data.cpu().numpy()[:, :, qs:-qs,
                                                             qs:-qs]
                # fmt:on
            else:
                probs = nn(images.to(rank)).data.cpu().numpy()

            for tile, prob in zip(tiles, probs):
                x, y, z = list(map(int, tile))
                mask = np.zeros((W, H), dtype=np.uint8)

                for c in range(C):
                    mask += np.around(prob[c, :, :]).astype(np.uint8) * c

                tile_label_to_file(args.out, mercantile.Tile(x, y, z), palette,
                                   transparency, mask)