Beispiel #1
0
def make_upfn(args, dataset, model, layername):
    '''Creates an upsampling function.'''
    convs, data_shape = None, None
    if args.model == 'alexnet':
        convs = [layer for name, layer in model.model.named_children()
                if name.startswith('conv') or name.startswith('pool')]
    elif args.model == 'progan':
        # Probe the data shape
        out = model(dataset[0][0][None,...].cuda())
        data_shape = model.retained_layer(layername).shape[2:]
        upfn = upsample.upsampler(
                (64, 64),
                data_shape=data_shape,
                image_size=out.shape[2:])
        return upfn
    else:
        # Probe the data shape
        _ = model(dataset[0][0][None,...].cuda())
        data_shape = model.retained_layer(layername).shape[2:]
        pbar.print('upsampling from data_shape', tuple(data_shape))
    upfn = upsample.upsampler(
            (56, 56),
            data_shape=data_shape,
            source=dataset,
            convolutions=convs)
    return upfn
Beispiel #2
0
 def __init__(self, size, image_size=None, data_size=None,
         renormalizer=None, scale_offset=None, level=None, actrange=None,
         source=None, convolutions=None, quantiles=None,
         percent_level=None):
     if image_size is None and source is not None:
         image_size = upsample.image_size_from_source(source)
     if renormalizer is None and source is not None:
         renormalizer = renormalize.renormalizer(source=source, mode='byte')
     if scale_offset is None and convolutions is not None:
         scale_offset = upsample.sequence_scale_offset(convolutions)
     if data_size is None and convolutions is not None:
         data_size = upsample.sequence_data_size(convolutions, image_size)
     if level is None and quantiles is not None:
         level = quantiles.quantiles([percent_level or 0.95])[:,0]
     if actrange is None and quantiles is not None:
         actrange = quantiles.quantiles([0.01, 0.99])
     if isinstance(size, int):
         size = (size, size)
     self.size = size
     self.image_size = image_size
     self.data_size = data_size
     self.renormalizer = renormalizer
     self.scale_offset = scale_offset
     self.percent_level = percent_level
     self.level = level
     self.actrange = actrange
     self.quantiles = quantiles
     self.upsampler = None
     if self.data_size is not None:
         self.upsampler = upsample.upsampler(size, data_size,
                 image_size=self.image_size,
                 scale_offset=scale_offset)
Beispiel #3
0
 def upsampler_for(self, a):
     if self.upsampler is not None:
         return self.upsampler
     return upsample.upsampler(self.size, a.shape,
                 image_size=self.image_size,
                 scale_offset=self.scale_offset,
                 dtype=a.dtype, device=a.device)
Beispiel #4
0
def make_upfn_without_hooks(args, dataset, layername, layers_output):
    convs = None
    data_HW_size = layers_output[layername].shape[2:]
    pbar.print('upsampling from data_shape', tuple(data_HW_size))
    upfn = upsample.upsampler(
        (56, 56),
        data_shape=data_HW_size,
        source=dataset,
        convolutions=convs)
    return upfn
    def __init__(self,
                 model,
                 dataset,
                 dataset_path,
                 model_layer,
                 seglabels=None,
                 segcatlabels=None,
                 model_nm=None):
        model = nethook.InstrumentedModel(model)
        model.cuda()
        model.eval()
        self.model = model
        self.layername = model_layer
        self.model.retain_layer(self.layername)
        self.model_name = model_nm

        self.topk = None
        self.unit_images = None
        self.iou99 = None

        self.upfn = upsample.upsampler(
            target_shape=(56, 56),
            data_shape=(7, 7),
        )

        if dataset == 'nih_seg':
            if seglabels is not None:
                self.seglabels = seglabels
            else:
                self.seglabels = [
                    'No Class', 'Atelectasis', 'Cardiomegaly', 'Effusion',
                    'Infiltrate', 'Mass', 'Nodule', 'Pneumonia',
                    'Pneumothorax', 'Consolidation', 'Edema', 'Emphysema',
                    'Fibrosis', 'Pleural_Thickening', 'Hernia'
                ]
            if segcatlabels is not None:
                self.segcatlabels = segcatlabels
            else:
                self.segcatlabels = [('No Class', 'No Class'),
                                     ('Atelectasis', 'Atelectasis'),
                                     ('Cardiomegaly', 'Cardiomegaly'),
                                     ('Effusion', 'Effusion'),
                                     ('Infiltrate', 'Infiltrate'),
                                     ('Mass', 'Mass'), ('Nodule', 'Nodule'),
                                     ('Pneumonia', 'Pneumonia'),
                                     ('Pneumothorax', 'Pneumothorax'),
                                     ('Consolidation', 'Consolidation'),
                                     ('Edema', 'Edema'),
                                     ('Emphysema', 'Emphysema'),
                                     ('Fibrosis', 'Fibrosis'),
                                     ('Pleural_Thickening',
                                      'Pleural_Thickening'),
                                     ('Hernia', 'Hernia')]

            if model_nm == 'chexpert_noweights':
                batch_sz = 10
            else:
                batch_sz = 20

            config = {'batch_size': batch_sz, 'input_size': (224, 224)}

            # Creating the dataloaders
            _, _, self.ds_loader = get_nih_segmented_dataloaders(
                dataset_path, **config)
            self.ds = self.ds_loader.dataset
            # Setting sample size
            self.sample_size = 100

        self.rq = self._get_rq_vals()
        self.iv = imgviz.ImageVisualizer(224,
                                         source=self.ds,
                                         percent_level=0.99,
                                         quantiles=self.rq)
    def __init__(self,
                 model,
                 dataset,
                 dataset_path,
                 model_layer,
                 seglabels=None,
                 segcatlabels=None):
        model = nethook.InstrumentedModel(model)
        model.cuda()
        model.eval()
        self.model = model
        self.layername = model_layer
        self.model.retain_layer(self.layername)

        self.topk = None
        self.unit_images = None
        self.iou99 = None

        self.upfn = upsample.upsampler(
            target_shape=(56, 56),
            data_shape=(7, 7),
        )

        if dataset == 'covid_seg':
            self.seglabels = [
                'No class', 'Left Lung', 'Right Lung', 'Cardiomediastinum',
                'Airways', 'Ground Glass Opacities', 'Consolidation',
                'Pleural Effusion', 'Pneumothorax', 'Endotracheal Tube',
                'Central Venous Line', 'Monitoring Probes', 'Nosogastric Tube',
                'Chest tube', 'Tubings'
            ]
            self.segcatlabels = [
                ('No class', 'No class'), ('Left Lung', 'Left Lung'),
                ('Right Lung', 'Right Lung'),
                ('Cardiomediastinum', 'Cardiomediastinum'),
                ('Airways', 'Airways'),
                ('Ground Glass Opacities', 'Ground Glass Opacities'),
                ('Consolidation', 'Consolidation'),
                ('Pleural Effusion', 'Pleural Effusion'),
                ('Pneumothorax', 'Pneumothorax'),
                ('Endotracheal Tube', 'Endotracheal Tube'),
                ('Central Venous Line', 'Central Venous Line'),
                ('Monitoring Probes', 'Monitoring Probes'),
                ('Nosogastric Tube', 'Nosogastric Tube'),
                ('Chest tube', 'Chest tube'), ('Tubings', 'Tubings')
            ]
            config = {
                'batch_size': 1,
                'input_size': (224, 224),
            }

            # Creating the dataloaders
            self.ds_loader = get_segmentation_dataloader(
                dataset_path, **config)
            self.ds = self.ds_loader.dataset
            # Specify the sample size in case of bigger dataset. Default is 100 for covid seg
            self.sample_size = 100

        self.rq = self._get_rq_vals()
        self.iv = imgviz.ImageVisualizer(224,
                                         source=self.ds,
                                         percent_level=0.99,
                                         quantiles=self.rq)
Beispiel #7
0
for layername in layers:
    #if os.path.isfile(os.path.join(qd.dir(layername), 'intersect_99.npz')):
    #    continue
    busy_fn = os.path.join(qd.dir(layername), 'busy.txt')
    if os.path.isfile(busy_fn):
        print(busy_fn)
        continue
    with open(busy_fn, 'w') as f:
        f.write('busy')
    print('working on', layername)

    inst_net = nethook.InstrumentedModel(copy.deepcopy(net)).cuda()
    inst_net.retain_layer('features.' + layername)
    inst_net(ds[0][0][None].cuda())
    sample_act = inst_net.retained_layer('features.' + layername).cpu()
    upfn = upsample.upsampler((64, 64), sample_act.shape[2:])

    def flat_acts(batch):
        inst_net(batch.cuda())
        acts = upfn(inst_net.retained_layer('features.' + layername))
        return acts.permute(0, 2, 3, 1).contiguous().view(-1, acts.shape[1])

    s_rq = tally.tally_quantile(flat_acts,
                                sds,
                                cachefile=os.path.join(qd.dir(layername),
                                                       's_rq.npz'))
    u_rq = qd.rq(layername)

    def intersect_99_fn(uimg, simg):
        s_99 = s_rq.quantiles(0.99)[None, :, None, None].cuda()
        u_99 = u_rq.quantiles(0.99)[None, :, None, None].cuda()
Beispiel #8
0
dataset = parallelfolder.ParallelImageFolders(
    ['dataset/places/val'], transform=[center_crop],
    classification=True,
    shuffle=True)

train_dataset = parallelfolder.ParallelImageFolders(
    ['dataset/places/train'], transform=[center_crop],
    classification=True,
    shuffle=True)

# Collect unconditional quantiles
from netdissect import tally

upfn = upsample.upsampler(
    (56, 56),                     # The target output shape
    (7, 7),
    source=dataset,
)

renorm = renormalize.renormalizer(dataset, mode='zc')

def compute_samples(batch, *args):
    image_batch = batch.cuda()
    _ = model(image_batch)
    acts = model.retained_layer(layername)
    hacts = upfn(acts)
    return hacts.permute(0, 2, 3, 1).contiguous().view(-1, acts.shape[1])

pbar.descnext('rq')
rq = tally.tally_quantile(compute_samples, dataset, sample_size=sample_size,
        r=8192, cachefile=resfile('rq.npz'))
Beispiel #9
0
def main():

    # Load the arguments
    args = parse_option()

    dataset = args.dataset
    sample_size = args.sample_size
    layername = args.layer

    # Other values for places and imagenet MoCo model
    epoch = 240
    image_size = 224
    crop = 0.2
    crop_padding = 32
    batch_size = 1
    num_workers = 24
    train_sampler = None
    moco = True

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    normalize = transforms.Normalize(mean=mean, std=std)

    # Set appropriate paths
    folder_path = "/data/vision/torralba/ganprojects/yyou/CMC_data/{}_models".format(
        dataset)
    model_name = "/{}_MoCo0.999_softmax_16384_resnet50_lr_0.03".format(dataset) \
                     + "_decay_0.0001_bsz_128_crop_0.2_aug_CJ"
    epoch_name = "/ckpt_epoch_{}.pth".format(epoch)
    my_path = folder_path + model_name + epoch_name

    data_path = "/data/vision/torralba/datasets/"
    web_path = "/data/vision/torralba/scratch/yyou/wednesday/dissection/"

    if dataset == "imagenet":
        data_path += "imagenet_pytorch"
        web_path += dataset + "/" + layername
    elif dataset == "places365":
        data_path += "places/places365_standard/places365standard_easyformat"
        web_path += dataset + "/" + layername

    # Create web path folder directory for this layer
    if not os.path.exists(web_path):
        os.makedirs(web_path)

    # Load validation data loader
    val_folder = data_path + "/val"
    val_transform = transforms.Compose([
        transforms.Resize(image_size + crop_padding),
        transforms.CenterCrop(image_size),
        transforms.ToTensor(),
        normalize,
    ])

    ds = QuickImageFolder(val_folder,
                          transform=val_transform,
                          shuffle=True,
                          two_crop=False)
    ds_loader = torch.utils.data.DataLoader(ds,
                                            batch_size=batch_size,
                                            shuffle=(train_sampler is None),
                                            num_workers=num_workers,
                                            pin_memory=True,
                                            sampler=train_sampler)

    # Load model from checkpoint
    checkpoint = torch.load(my_path)
    model_checkpoint = {
        key.replace(".module", ""): val
        for key, val in checkpoint['model'].items()
    }

    model = InsResNet50(parallel=False)
    model.load_state_dict(model_checkpoint)
    model = nethook.InstrumentedModel(model)
    model.cuda()

    # Renormalize RGB data from the staistical scaling in ds to [-1...1] range
    renorm = renormalize.renormalizer(source=ds, target='zc')

    # Retain desired layer with nethook
    batch = next(iter(ds_loader))[0]
    model.retain_layer(layername)
    model(batch.cuda())
    acts = model.retained_layer(layername).cpu()

    upfn = upsample.upsampler(
        target_shape=(56, 56),
        data_shape=(7, 7),
    )

    def flatten_activations(batch, *args):
        image_batch = batch
        _ = model(image_batch.cuda())
        acts = model.retained_layer(layername).cpu()
        hacts = upfn(acts)
        return hacts.permute(0, 2, 3, 1).contiguous().view(-1, acts.shape[1])

    def tally_quantile_for_layer(layername):
        rq = tally.tally_quantile(
            flatten_activations,
            dataset=ds,
            sample_size=sample_size,
            batch_size=100,
            cachefile='results/{}/{}_rq_cache.npz'.format(dataset, layername))
        return rq

    rq = tally_quantile_for_layer(layername)

    # Visualize range of activations (statistics of each filter over the sample images)
    fig, axs = plt.subplots(2, 2, figsize=(10, 8))
    axs = axs.flatten()
    quantiles = [0.5, 0.8, 0.9, 0.99]
    for i in range(4):
        axs[i].plot(rq.quantiles(quantiles[i]))
        axs[i].set_title("Rq quantiles ({})".format(quantiles[i]))
    fig.suptitle("{}  -  sample size of {}".format(dataset, sample_size))
    plt.savefig(web_path + "/rq_quantiles")

    # Set the image visualizer with the rq and percent level
    iv = imgviz.ImageVisualizer(224,
                                source=ds,
                                percent_level=0.95,
                                quantiles=rq)

    # Tally top k images that maximize the mean activation of the filter
    def max_activations(batch, *args):
        image_batch = batch.cuda()
        _ = model(image_batch)
        acts = model.retained_layer(layername)
        return acts.view(acts.shape[:2] + (-1, )).max(2)[0]

    def mean_activations(batch, *args):
        image_batch = batch.cuda()
        _ = model(image_batch)
        acts = model.retained_layer(layername)
        return acts.view(acts.shape[:2] + (-1, )).mean(2)

    topk = tally.tally_topk(
        mean_activations,
        dataset=ds,
        sample_size=sample_size,
        batch_size=100,
        cachefile='results/{}/{}_cache_mean_topk.npz'.format(
            dataset, layername))

    top_indexes = topk.result()[1]

    # Visualize top-activating images for a particular unit
    if not os.path.exists(web_path + "/top_activating_imgs"):
        os.makedirs(web_path + "/top_activating_imgs")

    def top_activating_imgs(unit):
        img_ids = [i for i in top_indexes[unit, :12]]
        images = [iv.masked_image(ds[i][0], \
                      model.retained_layer(layername)[0], unit) \
                      for i in img_ids]
        preds = [ds.classes[model(ds[i][0][None].cuda()).max(1)[1].item()]\
                    for i in img_ids]

        fig, axs = plt.subplots(3, 4, figsize=(16, 12))
        axs = axs.flatten()

        for i in range(12):
            axs[i].imshow(images[i])
            axs[i].tick_params(axis='both', which='both', bottom=False, \
                               left=False, labelbottom=False, labelleft=False)
            axs[i].set_title("img {} \n pred: {}".format(img_ids[i], preds[i]))
        fig.suptitle("unit {}".format(unit))

        plt.savefig(web_path + "/top_activating_imgs/unit_{}".format(unit))

    for unit in np.random.randint(len(top_indexes), size=10):
        top_activating_imgs(unit)

    def compute_activations(image_batch):
        image_batch = image_batch.cuda()
        _ = model(image_batch)
        acts_batch = model.retained_layer(layername)
        return acts_batch

    unit_images = iv.masked_images_for_topk(
        compute_activations,
        ds,
        topk,
        k=5,
        num_workers=10,
        pin_memory=True,
        cachefile='results/{}/{}_cache_top10images.npz'.format(
            dataset, layername))

    file = open("results/{}/unit_images.pkl".format(dataset, layername), 'wb')
    pickle.dump(unit_images, file)

    # Load a segmentation model
    segmodel, seglabels, segcatlabels = setting.load_segmenter('netpqc')

    # Intersections between every unit's 99th activation
    # and every segmentation class identified
    level_at_99 = rq.quantiles(0.99).cuda()[None, :, None, None]

    def compute_selected_segments(batch, *args):
        image_batch = batch.cuda()
        seg = segmodel.segment_batch(renorm(image_batch), downsample=4)
        _ = model(image_batch)
        acts = model.retained_layer(layername)
        hacts = upfn(acts)
        iacts = (hacts >
                 level_at_99).float()  # indicator where > 0.99 percentile.
        return tally.conditional_samples(iacts, seg)

    condi99 = tally.tally_conditional_mean(
        compute_selected_segments,
        dataset=ds,
        sample_size=sample_size,
        cachefile='results/{}/{}_cache_condi99.npz'.format(dataset, layername))

    iou99 = tally.iou_from_conditional_indicator_mean(condi99)
    file = open("results/{}/{}_iou99.pkl".format(dataset, layername), 'wb')
    pickle.dump(iou99, file)

    # Show units with best match to a segmentation class
    iou_unit_label_99 = sorted(
        [(unit, concept.item(), seglabels[concept], bestiou.item())
         for unit, (bestiou, concept) in enumerate(zip(*iou99.max(0)))],
        key=lambda x: -x[-1])

    fig, axs = plt.subplots(20, 1, figsize=(20, 80))
    axs = axs.flatten()

    for i, (unit, concept, label, score) in enumerate(iou_unit_label_99[:20]):
        axs[i].imshow(unit_images[unit])
        axs[i].set_title('unit %d; iou %g; label "%s"' % (unit, score, label))
        axs[i].set_xticks([])
        axs[i].set_yticks([])
    plt.savefig(web_path + "/best_unit_segmentation")