Exemple #1
0
def load_model(args):
    '''Loads one of the benchmark classifiers or generators.'''
    if args.model in ['alexnet', 'vgg16', 'resnet152', 'resnet18']:
        model = setting.load_classifier(args.model)
    elif args.model == 'progan':
        model = setting.load_proggan(args.dataset)
    model = nethook.InstrumentedModel(model).cuda().eval()
    return model
Exemple #2
0
def load_model(args):
    '''Loads one of the benchmark classifiers or generators.'''

    if args.model in ['alexnet', 'vgg16', 'resnet152']:
        if (args.dataset == 'ucf101'):
            model = setting.load_ucf101_classifier(args.model)
        else:
            model = setting.load_classifier(args.model)
    elif args.model == 'progan':
        model = setting.load_proggan(args.dataset)

    #Original model, use the if-else block below
    if (use_cuda):
        model = nethook.InstrumentedModel(model).cuda().eval()
    else:
        model = nethook.InstrumentedModel(model).eval()

    return model
Exemple #3
0
def get_moco_model(dataset, epoch=240):

    folder_path = "CMC/CMC_data/{}_models".format(dataset)
    model_name = "/{}_MoCo0.999_softmax_16384_resnet50".format(dataset) + \
                 "_lr_0.03_decay_0.0001_bsz_128_crop_0.2_aug_CJ"
    epoch_name = "/ckpt_epoch_{}.pth".format(epoch)
    my_path = folder_path + model_name + epoch_name

    checkpoint = torch.load(my_path)
    model_checkpoint = {
        key.replace(".module", ""): val
        for key, val in checkpoint['model'].items()
    }

    model = InsResNet50(parallel=False)
    model.load_state_dict(model_checkpoint)
    model = nethook.InstrumentedModel(model)
    return model
    def __init__(self,
                 model,
                 dataset,
                 dataset_path,
                 model_layer,
                 seglabels=None,
                 segcatlabels=None,
                 model_nm=None):
        model = nethook.InstrumentedModel(model)
        model.cuda()
        model.eval()
        self.model = model
        self.layername = model_layer
        self.model.retain_layer(self.layername)
        self.model_name = model_nm

        self.topk = None
        self.unit_images = None
        self.iou99 = None

        self.upfn = upsample.upsampler(
            target_shape=(56, 56),
            data_shape=(7, 7),
        )

        if dataset == 'nih_seg':
            if seglabels is not None:
                self.seglabels = seglabels
            else:
                self.seglabels = [
                    'No Class', 'Atelectasis', 'Cardiomegaly', 'Effusion',
                    'Infiltrate', 'Mass', 'Nodule', 'Pneumonia',
                    'Pneumothorax', 'Consolidation', 'Edema', 'Emphysema',
                    'Fibrosis', 'Pleural_Thickening', 'Hernia'
                ]
            if segcatlabels is not None:
                self.segcatlabels = segcatlabels
            else:
                self.segcatlabels = [('No Class', 'No Class'),
                                     ('Atelectasis', 'Atelectasis'),
                                     ('Cardiomegaly', 'Cardiomegaly'),
                                     ('Effusion', 'Effusion'),
                                     ('Infiltrate', 'Infiltrate'),
                                     ('Mass', 'Mass'), ('Nodule', 'Nodule'),
                                     ('Pneumonia', 'Pneumonia'),
                                     ('Pneumothorax', 'Pneumothorax'),
                                     ('Consolidation', 'Consolidation'),
                                     ('Edema', 'Edema'),
                                     ('Emphysema', 'Emphysema'),
                                     ('Fibrosis', 'Fibrosis'),
                                     ('Pleural_Thickening',
                                      'Pleural_Thickening'),
                                     ('Hernia', 'Hernia')]

            if model_nm == 'chexpert_noweights':
                batch_sz = 10
            else:
                batch_sz = 20

            config = {'batch_size': batch_sz, 'input_size': (224, 224)}

            # Creating the dataloaders
            _, _, self.ds_loader = get_nih_segmented_dataloaders(
                dataset_path, **config)
            self.ds = self.ds_loader.dataset
            # Setting sample size
            self.sample_size = 100

        self.rq = self._get_rq_vals()
        self.iv = imgviz.ImageVisualizer(224,
                                         source=self.ds,
                                         percent_level=0.99,
                                         quantiles=self.rq)
    def __init__(self,
                 model,
                 dataset,
                 dataset_path,
                 model_layer,
                 seglabels=None,
                 segcatlabels=None):
        model = nethook.InstrumentedModel(model)
        model.cuda()
        model.eval()
        self.model = model
        self.layername = model_layer
        self.model.retain_layer(self.layername)

        self.topk = None
        self.unit_images = None
        self.iou99 = None

        self.upfn = upsample.upsampler(
            target_shape=(56, 56),
            data_shape=(7, 7),
        )

        if dataset == 'covid_seg':
            self.seglabels = [
                'No class', 'Left Lung', 'Right Lung', 'Cardiomediastinum',
                'Airways', 'Ground Glass Opacities', 'Consolidation',
                'Pleural Effusion', 'Pneumothorax', 'Endotracheal Tube',
                'Central Venous Line', 'Monitoring Probes', 'Nosogastric Tube',
                'Chest tube', 'Tubings'
            ]
            self.segcatlabels = [
                ('No class', 'No class'), ('Left Lung', 'Left Lung'),
                ('Right Lung', 'Right Lung'),
                ('Cardiomediastinum', 'Cardiomediastinum'),
                ('Airways', 'Airways'),
                ('Ground Glass Opacities', 'Ground Glass Opacities'),
                ('Consolidation', 'Consolidation'),
                ('Pleural Effusion', 'Pleural Effusion'),
                ('Pneumothorax', 'Pneumothorax'),
                ('Endotracheal Tube', 'Endotracheal Tube'),
                ('Central Venous Line', 'Central Venous Line'),
                ('Monitoring Probes', 'Monitoring Probes'),
                ('Nosogastric Tube', 'Nosogastric Tube'),
                ('Chest tube', 'Chest tube'), ('Tubings', 'Tubings')
            ]
            config = {
                'batch_size': 1,
                'input_size': (224, 224),
            }

            # Creating the dataloaders
            self.ds_loader = get_segmentation_dataloader(
                dataset_path, **config)
            self.ds = self.ds_loader.dataset
            # Specify the sample size in case of bigger dataset. Default is 100 for covid seg
            self.sample_size = 100

        self.rq = self._get_rq_vals()
        self.iv = imgviz.ImageVisualizer(224,
                                         source=self.ds,
                                         percent_level=0.99,
                                         quantiles=self.rq)
#    result = PIL.Image.open(os.path.join(qd.dir(layername), 's_imgs/unit_%d.png' % unit))
#    result.load()
#    return result

for layername in layers:
    #if os.path.isfile(os.path.join(qd.dir(layername), 'intersect_99.npz')):
    #    continue
    busy_fn = os.path.join(qd.dir(layername), 'busy.txt')
    if os.path.isfile(busy_fn):
        print(busy_fn)
        continue
    with open(busy_fn, 'w') as f:
        f.write('busy')
    print('working on', layername)

    inst_net = nethook.InstrumentedModel(copy.deepcopy(net)).cuda()
    inst_net.retain_layer('features.' + layername)
    inst_net(ds[0][0][None].cuda())
    sample_act = inst_net.retained_layer('features.' + layername).cpu()
    upfn = upsample.upsampler((64, 64), sample_act.shape[2:])

    def flat_acts(batch):
        inst_net(batch.cuda())
        acts = upfn(inst_net.retained_layer('features.' + layername))
        return acts.permute(0, 2, 3, 1).contiguous().view(-1, acts.shape[1])

    s_rq = tally.tally_quantile(flat_acts,
                                sds,
                                cachefile=os.path.join(qd.dir(layername),
                                                       's_rq.npz'))
    u_rq = qd.rq(layername)
Exemple #7
0
    return os.path.join(resdir, filename)

# Download and instantiate the model.
model = oldresnet152.OldResNet152()
url = ('http://gandissect.csail.mit.edu/' +
       'models/resnet152_places365-f928166e5c.pth')
try:
    sd = torch.hub.load_state_dict_from_url(url) # pytorch 1.1
except:
    sd = torch.hub.model_zoo.load_url(url) # pytorch 1.0
model.load_state_dict(sd)

layername = '7'
sample_size = 36500

model = nethook.InstrumentedModel(model)
model = model.cuda()
model.retain_layer(layername)

# Load labels
from urllib.request import urlopen

synset_url = 'http://gandissect.csail.mit.edu/models/categories_places365.txt'
classlabels = [r.split(' ')[0][3:]
    for r in urlopen(synset_url).read().decode('utf-8').split('\n')]

# Load segmenter
from netdissect import segmenter
segmodel = segmenter.UnifiedParsingSegmenter(segsizes=[256])
seglabels = [l for l, c in segmodel.get_label_and_category_names()[0]]
def main():
    args = parseargs()
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    # layername = "encoder.layer4.0.conv2"
    layername = args.layername
    # dataset_split = "v32.1"
    resdir = '/scratch/users/abhishekm/vwm-dissect/results/%s-%s-%s-%s-%s-finetuned' % (
        args.model, args.dataset, args.seg, layername, args.data_split)
    if args.layer is not None:
        resdir += '-' + args.layer
    if args.quantile != 0.005:
        resdir += ('-%g' % (args.quantile * 1000))
    if args.thumbsize != 100:
        resdir += ('-t%d' % (args.thumbsize))
    resfile = pidfile.exclusive_dirfn(resdir)

    # model = load_model(args)
    cfg = cfg_parser(
        "/home/users/abhishekm/vwm/cfg/experiments/abm_dissect.json")
    _trainer = trainer.factory.create(cfg["model_cfg"].trainer_key, **cfg)
    model = _trainer.model
    model = nethook.InstrumentedModel(model).cuda().eval()
    # dataloader = _trainer.data_loaders["val"]["real_baseline"]
    # layername = "encoder.layer4.1.conv2" # instrumented_layername(args)
    model.retain_layer(layername)
    dataset = _trainer.datasets['val'][0][
        0]  # load_dataset(args, model=model.model)
    upfn = make_upfn(args, dataset, model, layername)
    sample_size = None  # len(dataset)
    is_generator = (args.model == 'progan')
    percent_level = 1.0 - args.quantile
    iou_threshold = args.miniou
    image_row_width = 5
    torch.set_grad_enabled(False)

    # Tally rq.np (representation quantile, unconditional).
    pbar.descnext('rq')

    def compute_samples(batch, *args):
        data_batch = batch.cuda()
        _ = model(data_batch)
        acts = model.retained_layer(layername)
        hacts = upfn(acts)
        return hacts.permute(0, 2, 3, 1).contiguous().view(-1, acts.shape[1])

    rq = tally.tally_quantile(compute_samples,
                              dataset,
                              sample_size=sample_size,
                              r=8192,
                              num_workers=10,
                              pin_memory=True,
                              cachefile=resfile('rq.npz'))

    # Create visualizations - first we need to know the topk
    pbar.descnext('topk')

    def compute_image_max(batch, *args):
        data_batch = batch.cuda()
        _ = model(data_batch)
        acts = model.retained_layer(layername)
        acts = acts.view(acts.shape[0], acts.shape[1], -1)
        acts = acts.max(2)[0]
        return acts

    topk = tally.tally_topk(compute_image_max,
                            dataset,
                            sample_size=sample_size,
                            batch_size=50,
                            num_workers=30,
                            pin_memory=True,
                            cachefile=resfile('topk.npz'))

    # Visualize top-activating patches of top-activatin images.
    pbar.descnext('unit_images')
    image_size, image_source = (224, 224), None
    if is_generator:
        image_size = model(dataset[0][0].cuda()[None, ...]).shape[2:]
    else:
        image_source = dataset
    iv = imgviz.ImageVisualizer((args.thumbsize, args.thumbsize),
                                image_size=image_size,
                                source=dataset,
                                quantiles=rq,
                                level=rq.quantiles(percent_level))

    def compute_acts(data_batch, *ignored_class):
        data_batch = data_batch.cuda()
        out_batch = model(data_batch)
        acts_batch = model.retained_layer(layername)
        if is_generator:
            return (acts_batch, out_batch)
        else:
            return (acts_batch, data_batch)

    unit_images = iv.masked_images_for_topk(
        compute_acts,
        dataset,
        topk,
        k=image_row_width,
        num_workers=30,
        pin_memory=True,
        cachefile=resfile('top%dimages.npz' % image_row_width))
    pbar.descnext('saving images')
    imgsave.save_image_set(unit_images,
                           resfile('image/unit%d.jpg'),
                           sourcefile=resfile('top%dimages.npz' %
                                              image_row_width))

    # Compute IoU agreement between segmentation labels and every unit
    # Grab the 99th percentile, and tally conditional means at that level.
    level_at_99 = rq.quantiles(percent_level).cuda()[None, :, None, None]

    segmodel, seglabels, segcatlabels = setting.load_segmenter(args.seg)
    renorm = renormalize.renormalizer(dataset, target='zc')

    def compute_conditional_indicator(batch, *args):
        data_batch = batch.cuda()
        out_batch = model(data_batch)
        image_batch = out_batch if is_generator else renorm(data_batch)
        seg = segmodel.segment_batch(image_batch, downsample=4)
        acts = model.retained_layer(layername)
        hacts = upfn(acts)
        iacts = (hacts > level_at_99).float()  # indicator
        return tally.conditional_samples(iacts, seg)

    pbar.descnext('condi99')
    condi99 = tally.tally_conditional_mean(compute_conditional_indicator,
                                           dataset,
                                           sample_size=sample_size,
                                           num_workers=3,
                                           pin_memory=True,
                                           cachefile=resfile('condi99.npz'))

    # Now summarize the iou stats and graph the units
    iou_99 = tally.iou_from_conditional_indicator_mean(condi99)
    unit_label_99 = [(concept.item(), seglabels[concept],
                      segcatlabels[concept], bestiou.item())
                     for (bestiou, concept) in zip(*iou_99.max(0))]
    labelcat_list = [
        labelcat for concept, label, labelcat, iou in unit_label_99
        if iou > iou_threshold
    ]
    save_conceptcat_graph(resfile('concepts_99.svg'), labelcat_list)
    dump_json_file(
        resfile('report.json'),
        dict(header=dict(name='%s %s %s' %
                         (args.model, args.dataset, args.seg),
                         image='concepts_99.svg'),
             units=[
                 dict(image='image/unit%d.jpg' % u,
                      unit=u,
                      iou=iou,
                      label=label,
                      cat=labelcat[1]) for u, (concept, label, labelcat,
                                               iou) in enumerate(unit_label_99)
             ]))
    copy_static_file('report.html', resfile('+report.html'))
    resfile.done()
Exemple #9
0
def main():

    # Load the arguments
    args = parse_option()

    dataset = args.dataset
    sample_size = args.sample_size
    layername = args.layer

    # Other values for places and imagenet MoCo model
    epoch = 240
    image_size = 224
    crop = 0.2
    crop_padding = 32
    batch_size = 1
    num_workers = 24
    train_sampler = None
    moco = True

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    normalize = transforms.Normalize(mean=mean, std=std)

    # Set appropriate paths
    folder_path = "/data/vision/torralba/ganprojects/yyou/CMC_data/{}_models".format(
        dataset)
    model_name = "/{}_MoCo0.999_softmax_16384_resnet50_lr_0.03".format(dataset) \
                     + "_decay_0.0001_bsz_128_crop_0.2_aug_CJ"
    epoch_name = "/ckpt_epoch_{}.pth".format(epoch)
    my_path = folder_path + model_name + epoch_name

    data_path = "/data/vision/torralba/datasets/"
    web_path = "/data/vision/torralba/scratch/yyou/wednesday/dissection/"

    if dataset == "imagenet":
        data_path += "imagenet_pytorch"
        web_path += dataset + "/" + layername
    elif dataset == "places365":
        data_path += "places/places365_standard/places365standard_easyformat"
        web_path += dataset + "/" + layername

    # Create web path folder directory for this layer
    if not os.path.exists(web_path):
        os.makedirs(web_path)

    # Load validation data loader
    val_folder = data_path + "/val"
    val_transform = transforms.Compose([
        transforms.Resize(image_size + crop_padding),
        transforms.CenterCrop(image_size),
        transforms.ToTensor(),
        normalize,
    ])

    ds = QuickImageFolder(val_folder,
                          transform=val_transform,
                          shuffle=True,
                          two_crop=False)
    ds_loader = torch.utils.data.DataLoader(ds,
                                            batch_size=batch_size,
                                            shuffle=(train_sampler is None),
                                            num_workers=num_workers,
                                            pin_memory=True,
                                            sampler=train_sampler)

    # Load model from checkpoint
    checkpoint = torch.load(my_path)
    model_checkpoint = {
        key.replace(".module", ""): val
        for key, val in checkpoint['model'].items()
    }

    model = InsResNet50(parallel=False)
    model.load_state_dict(model_checkpoint)
    model = nethook.InstrumentedModel(model)
    model.cuda()

    # Renormalize RGB data from the staistical scaling in ds to [-1...1] range
    renorm = renormalize.renormalizer(source=ds, target='zc')

    # Retain desired layer with nethook
    batch = next(iter(ds_loader))[0]
    model.retain_layer(layername)
    model(batch.cuda())
    acts = model.retained_layer(layername).cpu()

    upfn = upsample.upsampler(
        target_shape=(56, 56),
        data_shape=(7, 7),
    )

    def flatten_activations(batch, *args):
        image_batch = batch
        _ = model(image_batch.cuda())
        acts = model.retained_layer(layername).cpu()
        hacts = upfn(acts)
        return hacts.permute(0, 2, 3, 1).contiguous().view(-1, acts.shape[1])

    def tally_quantile_for_layer(layername):
        rq = tally.tally_quantile(
            flatten_activations,
            dataset=ds,
            sample_size=sample_size,
            batch_size=100,
            cachefile='results/{}/{}_rq_cache.npz'.format(dataset, layername))
        return rq

    rq = tally_quantile_for_layer(layername)

    # Visualize range of activations (statistics of each filter over the sample images)
    fig, axs = plt.subplots(2, 2, figsize=(10, 8))
    axs = axs.flatten()
    quantiles = [0.5, 0.8, 0.9, 0.99]
    for i in range(4):
        axs[i].plot(rq.quantiles(quantiles[i]))
        axs[i].set_title("Rq quantiles ({})".format(quantiles[i]))
    fig.suptitle("{}  -  sample size of {}".format(dataset, sample_size))
    plt.savefig(web_path + "/rq_quantiles")

    # Set the image visualizer with the rq and percent level
    iv = imgviz.ImageVisualizer(224,
                                source=ds,
                                percent_level=0.95,
                                quantiles=rq)

    # Tally top k images that maximize the mean activation of the filter
    def max_activations(batch, *args):
        image_batch = batch.cuda()
        _ = model(image_batch)
        acts = model.retained_layer(layername)
        return acts.view(acts.shape[:2] + (-1, )).max(2)[0]

    def mean_activations(batch, *args):
        image_batch = batch.cuda()
        _ = model(image_batch)
        acts = model.retained_layer(layername)
        return acts.view(acts.shape[:2] + (-1, )).mean(2)

    topk = tally.tally_topk(
        mean_activations,
        dataset=ds,
        sample_size=sample_size,
        batch_size=100,
        cachefile='results/{}/{}_cache_mean_topk.npz'.format(
            dataset, layername))

    top_indexes = topk.result()[1]

    # Visualize top-activating images for a particular unit
    if not os.path.exists(web_path + "/top_activating_imgs"):
        os.makedirs(web_path + "/top_activating_imgs")

    def top_activating_imgs(unit):
        img_ids = [i for i in top_indexes[unit, :12]]
        images = [iv.masked_image(ds[i][0], \
                      model.retained_layer(layername)[0], unit) \
                      for i in img_ids]
        preds = [ds.classes[model(ds[i][0][None].cuda()).max(1)[1].item()]\
                    for i in img_ids]

        fig, axs = plt.subplots(3, 4, figsize=(16, 12))
        axs = axs.flatten()

        for i in range(12):
            axs[i].imshow(images[i])
            axs[i].tick_params(axis='both', which='both', bottom=False, \
                               left=False, labelbottom=False, labelleft=False)
            axs[i].set_title("img {} \n pred: {}".format(img_ids[i], preds[i]))
        fig.suptitle("unit {}".format(unit))

        plt.savefig(web_path + "/top_activating_imgs/unit_{}".format(unit))

    for unit in np.random.randint(len(top_indexes), size=10):
        top_activating_imgs(unit)

    def compute_activations(image_batch):
        image_batch = image_batch.cuda()
        _ = model(image_batch)
        acts_batch = model.retained_layer(layername)
        return acts_batch

    unit_images = iv.masked_images_for_topk(
        compute_activations,
        ds,
        topk,
        k=5,
        num_workers=10,
        pin_memory=True,
        cachefile='results/{}/{}_cache_top10images.npz'.format(
            dataset, layername))

    file = open("results/{}/unit_images.pkl".format(dataset, layername), 'wb')
    pickle.dump(unit_images, file)

    # Load a segmentation model
    segmodel, seglabels, segcatlabels = setting.load_segmenter('netpqc')

    # Intersections between every unit's 99th activation
    # and every segmentation class identified
    level_at_99 = rq.quantiles(0.99).cuda()[None, :, None, None]

    def compute_selected_segments(batch, *args):
        image_batch = batch.cuda()
        seg = segmodel.segment_batch(renorm(image_batch), downsample=4)
        _ = model(image_batch)
        acts = model.retained_layer(layername)
        hacts = upfn(acts)
        iacts = (hacts >
                 level_at_99).float()  # indicator where > 0.99 percentile.
        return tally.conditional_samples(iacts, seg)

    condi99 = tally.tally_conditional_mean(
        compute_selected_segments,
        dataset=ds,
        sample_size=sample_size,
        cachefile='results/{}/{}_cache_condi99.npz'.format(dataset, layername))

    iou99 = tally.iou_from_conditional_indicator_mean(condi99)
    file = open("results/{}/{}_iou99.pkl".format(dataset, layername), 'wb')
    pickle.dump(iou99, file)

    # Show units with best match to a segmentation class
    iou_unit_label_99 = sorted(
        [(unit, concept.item(), seglabels[concept], bestiou.item())
         for unit, (bestiou, concept) in enumerate(zip(*iou99.max(0)))],
        key=lambda x: -x[-1])

    fig, axs = plt.subplots(20, 1, figsize=(20, 80))
    axs = axs.flatten()

    for i, (unit, concept, label, score) in enumerate(iou_unit_label_99[:20]):
        axs[i].imshow(unit_images[unit])
        axs[i].set_title('unit %d; iou %g; label "%s"' % (unit, score, label))
        axs[i].set_xticks([])
        axs[i].set_yticks([])
    plt.savefig(web_path + "/best_unit_segmentation")
Exemple #10
0
def main():
    CHOSEN_UNITS_DIR = os.path.join("/", "home", "dwijaya", "dissect",
                                    "experiment", "ucf101", "datas",
                                    "chosen_units.csv")
    report_dir = os.path.join(
        "/", "home", "dwijaya", "dissect", "experiment",
        "results/vgg16-ucf101-netpqc-conv5_3-10/report.json")
    result_test = load_json(report_dir)['units']
    # getUnitLabel(result_test)
    # groupUnitByLabel(result_test)
    chosen_units_df = pd.read_csv(CHOSEN_UNITS_DIR)
    args = parseargs()
    model = setting.load_ucf101_classifier(args.model)
    model = nethook.InstrumentedModel(model).cuda().eval()
    layername = args.layer
    model.retain_layer(layername)
    dataset = setting.load_ucf101_dataset(crop_size=224,
                                          in_dataloader=False,
                                          is_all_frames=True)
    train_dataset = setting.load_ucf101_dataset(crop_size=224,
                                                in_dataloader=False,
                                                is_all_frames=True,
                                                is_train=True)

    num_units = len(chosen_units_df)
    classlabels = dataset.classes

    def zeroingTopK(k=14):
        directory = os.path.join(os.getcwd(), 'results/shared',
                                 'pra-vgg16-ucf101/per_class')
        save_dir = os.path.join(directory, 'topK_target.csv')
        if (os.path.exists(save_dir)):
            df = pd.read_csv(save_dir)
            print(df)
        else:
            # topK_all_class = []
            # for idx, cl in enumerate(classlabels):
            #     cachefile = sharedfile('pra-%s-%s/%s/%s.npz' % (args.model, args.dataset, args.experiments, cl))
            #     df = pd.read_csv(save_dir)
            #     df2 = pd.read_csv(os.path.join(directory, '%s.csv' % cl))
            #     to_save = []
            #     for idx, (unit, concept) in enumerate(zip(df2['Unit'].loc[14:], df2['Concept'].loc[14:])):
            #         to_save.append((unit, concept))
            #     topK_all_class.append(to_save)
            # df = df.rename(columns={'Unnamed: 0': 'Class', '0': 'Acc_dropped'})
            # df['Unit/Concept'] = topK_all_class
            # df['Class'] = classlabels
            # df.to_csv(save_dir)
            topK_all_class = []
            acc_per_class_list, target_acc_class = [], []
            for idx, cl in enumerate(classlabels):
                cachefile = sharedfile(
                    'pra-%s-%s/%s/%s.npz' %
                    (args.model, args.dataset, args.experiments, cl))
                df = pd.read_csv(os.path.join(directory, '%s.csv' % cl))
                units_to_remove = df['Unit'].loc[:k - 1].to_list()
                accuracy, acc_per_class = my_test_perclass(
                    model,
                    dataset,
                    layername=layername,
                    ablated_units=units_to_remove,
                    cachefile=cachefile)
                target_acc_class.append(acc_per_class[idx])
                acc_per_class_list.append(acc_per_class)
                to_save = []
                for idx, (unit, concept) in enumerate(
                        zip(df['Unit'].loc[:k - 1],
                            df['Concept'].loc[:k - 1])):
                    to_save.append((unit, concept))
                topK_all_class.append(to_save)

            result_df = pd.DataFrame(target_acc_class, columns=['Acc_dropped'])
            # result_df = result_df.rename(columns={'Unnamed: 0': 'Class', '0': 'Acc_dropped'})
            result_df['Unit/Concept'] = topK_all_class
            result_df['Class'] = classlabels

            result_df.to_csv(os.path.join(directory, "topK_target.csv"))
            pd.DataFrame(acc_per_class_list).to_csv(
                os.path.join('topK_per_class.csv'))

    def zeroingBottomK(k=498):  #previously is 498, so it is wrong.
        directory = os.path.join(os.getcwd(), 'results/shared',
                                 'pra-vgg16-ucf101/per_class')
        topK_all_class = []
        acc_per_class_list, target_acc_class = [], []
        for idx, cl in enumerate(classlabels):
            cachefile = sharedfile(
                'pra-%s-%s/%s/%s.npz' %
                (args.model, args.dataset, args.experiments, cl))
            df = pd.read_csv(os.path.join(directory, '%s.csv' % cl))
            units_to_remove = df['Unit'].loc[k:].to_list()
            accuracy, acc_per_class = my_test_perclass(
                model,
                dataset,
                layername=layername,
                ablated_units=units_to_remove,
                cachefile=cachefile)
            target_acc_class.append(acc_per_class[idx])
            acc_per_class_list.append(acc_per_class)
            to_save = []
            for idx, (unit, concept) in enumerate(
                    zip(df['Unit'].loc[k:], df['Concept'].loc[k:])):
                to_save.append((unit, concept))
            topK_all_class.append(to_save)

        result_df = pd.DataFrame(target_acc_class, columns=['Acc_dropped'])
        result_df['Unit/Concept'] = topK_all_class
        result_df['Class'] = classlabels

        result_df.to_csv(os.path.join(directory, "bottomK_target_new.csv"))
        # pd.DataFrame(target_acc_class).to_csv(os.path.join(directory, "bottomK_target_new.csv"))
        pd.DataFrame(acc_per_class_list).to_csv(
            os.path.join(directory, 'bottomK_per_class_new.csv'))

    def zeroKWithConcepts():
        directory = os.path.join(os.getcwd(), 'results/shared',
                                 'pra-vgg16-ucf101/per_class')
        save_dir = os.path.join(directory, 'bottomK_target.csv')
        topK_all_class = []
        for idx, cl in enumerate(classlabels):
            cachefile = sharedfile(
                'pra-%s-%s/%s/%s.npz' %
                (args.model, args.dataset, args.experiments, cl))
            df = pd.read_csv(save_dir)
            df2 = pd.read_csv(os.path.join(directory, '%s.csv' % cl))
            to_save = []
            for idx, (unit, concept) in enumerate(
                    zip(df2['Unit'].loc[14:], df2['Concept'].loc[14:])):
                to_save.append((unit, concept))
            topK_all_class.append(to_save)
        df = df.rename(columns={'Unnamed: 0': 'Class', '0': 'Acc_dropped'})
        df['Unit/Concept'] = topK_all_class
        df['Class'] = classlabels
        df.to_csv(save_dir)
        print("HELLO")
        # df = df.rename(columns={"Unnamed: 0": "Concept"})
        # df.to_csv(os.path.join(directory, '%s.csv' % cl))

    # coba()

    # sortAcc()
    #Getting the baseline accuracy.
    baseline_acc_dir = os.path.join(
        os.getcwd(), 'results/shared',
        'pra-%s-%s/baseline_acc.npz' % (args.model, args.dataset))
    if (os.path.exists(baseline_acc_dir)):
        baseline_ = np.load(baseline_acc_dir)
        baseline_acc, baseline_acc_per_class = baseline_['acc'], baseline_[
            'acc_per_class']
    else:
        pbar.descnext('baseline_pra')
        baseline_acc, baseline_acc_per_class = my_test_perclass(
            model,
            dataset,
            ablated_units=None,
            cachefile=sharedfile('pra-%s-%s/%s_acc.npz' %
                                 (args.model, args.dataset, args.experiments)))
        cachefile = sharedfile('pra-%s-%s/%s_acc.npz' %
                               (args.model, args.dataset, args.experiments))
        np.savez(cachefile,
                 acc=baseline_acc,
                 acc_per_class=baseline_acc_per_class)
        baseline_acc_per_class = np.expand_dims(baseline_acc_per_class, axis=0)
        pd.DataFrame(baseline_acc_per_class,
                     index=['Baseline'],
                     columns=classlabels).to_csv("base_line.csv")

    #Now erase each unit, one at a time, and retest accuracy.
    cached_results_dir = os.path.join(
        os.getcwd(), 'results/shared',
        'pra-%s-%s/%s_acc.npz' % (args.model, args.dataset, args.experiments))

    cachefile = sharedfile('pra-%s-%s/%s_acc.npz' %
                           (args.model, args.dataset, args.experiments))
    all_units = []

    if (args.experiments == "topK"):
        zeroingTopK()
    elif (args.experiments == "bottomK"):
        zeroingBottomK()

    if (args.extract_data):
        baseline_ = {
            'acc': baseline_acc,
            'acc_per_class': baseline_acc_per_class
        }
        # npzToCSV(args.experiments, columns=classlabels, baseline_=baseline_, export_csv=False)
        # sortUnitByClass(baseline_, args.experiments, classlabels)
    else:
        if (args.experiments == 'exp1'):
            df = pd.read_csv(os.path.join(datas_dir, 'Sensible units.csv'))
            if (os.path.exists(cached_results_dir)):
                # IF THE RESULT ALREADY EXISTS
                acc_per_class_list = np.load(
                    cached_results_dir)['acc_per_class']
                acc_list = np.load(cached_results_dir)['acc']
            else:
                #Remove unit one at a time.
                units_to_remove, concepts = df['Unit'], df['Concepts']
                for idx, (units,
                          concept) in enumerate(zip(units_to_remove,
                                                    concepts)):
                    units = units.split(',')
                    units = [(int(u), concept) for u in units]
                    all_units.extend(units)
                acc_per_class_list = np.zeros(
                    [len(all_units), len(classlabels)])
                acc_list = np.zeros(len(classlabels))
                for idx, (unit, c) in enumerate(all_units):
                    accuracy, acc_per_class = my_test_perclass(
                        model,
                        dataset,
                        layername=layername,
                        ablated_units=[unit],
                        cachefile=cachefile)
                    acc_list[idx] = accuracy
                    acc_per_class_list[idx] = acc_per_class

                np.savez(cachefile,
                         acc=acc_list,
                         acc_per_class=acc_per_class_list)

        elif (args.experiments == 'exp2'):
            df = pd.read_csv(os.path.join(datas_dir, 'Sensible units.csv'))
            if (os.path.exists(cached_results_dir)):
                #IF THE RESULT ALREADY EXISTS
                acc_per_class_list = np.load(
                    cached_results_dir)['acc_per_class']
                acc_list = np.load(cached_results_dir)['acc']
            else:
                #Remove multiple units at a time
                units_to_remove, concepts = df['Unit'], df['Concepts']
                acc_per_class_list = np.zeros([num_units, len(classlabels)])
                acc_list = np.zeros(len(classlabels))
                for idx, (units,
                          concept) in enumerate(zip(units_to_remove,
                                                    concepts)):
                    units = units.split(',')
                    units = [int(u) for u in units]
                    accuracy, acc_per_class = my_test_perclass(
                        model,
                        dataset,
                        layername=layername,
                        ablated_units=units,
                        cachefile=cachefile)
                    acc_list[idx] = accuracy
                    acc_per_class_list[idx] = acc_per_class  # in a list

                np.savez(cachefile,
                         acc=acc_list,
                         acc_per_class=acc_per_class_list)
        elif (args.experiments == 'exp3'):
            df = pd.read_csv(os.path.join(datas_dir, 'units_label.csv'))
            if (os.path.exists(cached_results_dir)):
                acc_per_class_list = np.load(
                    cached_results_dir)['acc_per_class']
                acc_list = np.load(cached_results_dir)['acc']
            else:
                # Remove multiple units at a time
                concepts = df['Concepts']
                # acc_per_class_list = np.zeros([len(concepts), len(classlabels)])
                # acc_list = np.zeros(len(concepts))

                # acc_per_class = np.zeros(len(classlabels))
                # acc_per_class = np.zeros(len(classlabels))

                process_complete = tqdm.tqdm(total=len(concepts),
                                             desc='Units Complete',
                                             position=0)
                for idx, (concept) in enumerate(concepts):
                    cachefile = sharedfile(
                        'pra-%s-%s/%s/%s.npz' %
                        (args.model, args.dataset, args.experiments,
                         "unit" + str(idx)))
                    if (not os.path.exists(cachefile)):
                        unit = idx
                        # units = [int(u) for u in units]
                        accuracy, acc_per_class = my_test_perclass(
                            model,
                            dataset,
                            layername=layername,
                            ablated_units=[unit],
                            cachefile=cachefile)
                        # acc_list[idx] = accuracy
                        # acc_per_class_list[idx] = acc_per_class  # in a list
                        np.savez(cachefile,
                                 acc=accuracy,
                                 acc_per_class=acc_per_class,
                                 concept=concept)
                    else:
                        print("Unit %s is done" % (str(idx)))
                    process_complete.update(1)

        elif (args.experiments == 'exp4'):
            with open(os.path.join(datas_dir, 'units_by_labels.json'),
                      'r') as file:
                df = json.load(file)
            acc_per_class_list = np.zeros([len(df), len(classlabels)])
            acc_list = np.zeros(len(df))
            process_complete = tqdm.tqdm(total=len(df),
                                         desc='Concepts Complete',
                                         position=0)
            for idx, (concept, units) in enumerate(df.items()):
                cachefile = sharedfile(
                    'pra-%s-%s/%s/%s.npz' %
                    (args.model, args.dataset, args.experiments, concept))
                if (not os.path.exists(cachefile)):
                    #i.e (concept, units) = ('arm', [42,260,462,464])
                    accuracy, acc_per_class = my_test_perclass(
                        model,
                        dataset,
                        layername=layername,
                        ablated_units=units,
                        cachefile=cachefile)
                    acc_list[idx] = accuracy
                    acc_per_class_list[idx] = acc_per_class
                    np.savez(cachefile,
                             acc=acc_list,
                             acc_per_class=acc_per_class_list)
                else:
                    print("Concept : %s is done" % (concept))
                process_complete.update(1)
Exemple #11
0
def main():
    args = parseargs()

    model = setting.load_classifier(args.model)
    model = nethook.InstrumentedModel(model).cuda().eval()
    layername = args.layer
    model.retain_layer(layername)
    dataset = setting.load_dataset(args.dataset, crop_size=224)
    train_dataset = setting.load_dataset(args.dataset,
                                         crop_size=224,
                                         split='train')
    sample_size = len(dataset)

    # Probe layer to get sizes
    model(dataset[0][0][None].cuda())
    num_units = model.retained_layer(layername).shape[1]
    classlabels = dataset.classes

    # Measure baseline classification accuracy on val set, and cache.
    pbar.descnext('baseline_pra')
    baseline_precision, baseline_recall, baseline_accuracy, baseline_ba = (
        test_perclass_pra(model,
                          dataset,
                          cachefile=sharedfile('pra-%s-%s/pra_baseline.npz' %
                                               (args.model, args.dataset))))
    pbar.print('baseline acc', baseline_ba.mean().item())

    # Now erase each unit, one at a time, and retest accuracy.
    unit_list = random.sample(list(range(num_units)), num_units)
    val_single_unit_ablation_ba = torch.zeros(num_units, len(classlabels))
    for unit in pbar(unit_list):
        pbar.descnext('test unit %d' % unit)
        # Get binary accuracy if the model after ablating the unit.
        _, _, _, ablation_ba = test_perclass_pra(
            model,
            dataset,
            layername=layername,
            ablated_units=[unit],
            cachefile=sharedfile('pra-%s-%s/pra_ablate_unit_%d.npz' %
                                 (args.model, args.dataset, unit)))
        val_single_unit_ablation_ba[unit] = ablation_ba

    # For the purpose of ranking units by importance to a class, we
    # measure using the training set (to avoid training unit ordering
    # on the test set).
    sample_size = None
    # Measure baseline classification accuracy, and cache.
    pbar.descnext('train_baseline_pra')
    baseline_precision, baseline_recall, baseline_accuracy, baseline_ba = (
        test_perclass_pra(
            model,
            train_dataset,
            sample_size=sample_size,
            cachefile=sharedfile('ttv-pra-%s-%s/pra_train_baseline.npz' %
                                 (args.model, args.dataset))))
    pbar.print('baseline acc', baseline_ba.mean().item())

    # Measure accuracy on the val set.
    pbar.descnext('val_baseline_pra')
    _, _, _, val_baseline_ba = (test_perclass_pra(
        model,
        dataset,
        cachefile=sharedfile('ttv-pra-%s-%s/pra_val_baseline.npz' %
                             (args.model, args.dataset))))
    pbar.print('val baseline acc', val_baseline_ba.mean().item())

    # Do in shuffled order to allow multiprocessing.
    single_unit_ablation_ba = torch.zeros(num_units, len(classlabels))
    for unit in pbar(unit_list):
        pbar.descnext('test unit %d' % unit)
        _, _, _, ablation_ba = test_perclass_pra(
            model,
            train_dataset,
            layername=layername,
            ablated_units=[unit],
            sample_size=sample_size,
            cachefile=sharedfile('ttv-pra-%s-%s/pra_train_ablate_unit_%d.npz' %
                                 (args.model, args.dataset, unit)))
        single_unit_ablation_ba[unit] = ablation_ba

    # Now for every class, remove a set of the N most-important
    # and N least-important units for that class, and measure accuracy.
    for classnum in pbar(
            random.sample(range(len(classlabels)), len(classlabels))):
        # For a few classes, let's chart the whole range of ablations.
        if classnum in [100, 169, 351, 304]:
            num_best_list = range(1, num_units)
        else:
            num_best_list = [1, 2, 3, 4, 5, 20, 64, 128, 256]
        pbar.descnext('numbest')
        for num_best in pbar(random.sample(num_best_list, len(num_best_list))):
            num_worst = num_units - num_best
            unitlist = single_unit_ablation_ba[:,
                                               classnum].sort(0)[1][:num_best]
            _, _, _, testba = test_perclass_pra(
                model,
                dataset,
                layername=layername,
                ablated_units=unitlist,
                cachefile=sharedfile(
                    'ttv-pra-%s-%s/pra_val_ablate_classunits_%s_ba_%d.npz' %
                    (args.model, args.dataset, classlabels[classnum],
                     len(unitlist))))
            unitlist = (
                single_unit_ablation_ba[:, classnum].sort(0)[1][-num_worst:])
            _, _, _, testba2 = test_perclass_pra(
                model,
                dataset,
                layername=layername,
                ablated_units=unitlist,
                cachefile=sharedfile(
                    'ttv-pra-%s-%s/pra_val_ablate_classunits_%s_worstba_%d.npz'
                    % (args.model, args.dataset, classlabels[classnum],
                       len(unitlist))))
            pbar.print('%s: best %d %.3f vs worst N %.3f' %
                       (classlabels[classnum], num_best,
                        testba[classnum] - val_baseline_ba[classnum],
                        testba2[classnum] - val_baseline_ba[classnum]))