def main():
    parser = argparse.ArgumentParser(description='Net dissect utility',
                                     prog='python -m netdissect.fsd')
    parser.add_argument('true_dir')
    parser.add_argument('gen_dir')
    parser.add_argument('--size', type=int, default=10000)
    parser.add_argument('--cachedir', default=None)
    parser.add_argument('--histout', default=None)
    parser.add_argument('--maxscale', type=float, default=50.0)
    parser.add_argument('--labelcount', type=int, default=30)
    parser.add_argument('--dpi', type=float, default=100)
    if len(sys.argv) == 1:
        parser.print_usage(sys.stderr)
        sys.exit(1)
    args = parser.parse_args()
    pbar.verbose(True)
    true_dir, gen_dir = args.true_dir, args.gen_dir
    seed1, seed2 = [1, 1 if true_dir != gen_dir else 2]
    true_tally, gen_tally = [
        cached_tally_directory(d,
                               size=args.size,
                               cachedir=args.cachedir,
                               seed=seed)
        for d, seed in [(true_dir, seed1), (gen_dir, seed2)]
    ]
    fsd, meandiff, covdiff = frechet_distance.sample_frechet_distance(
        true_tally * 100, gen_tally * 100, return_components=True)
    print('fsd: %f; meandiff: %f; covdiff: %f' % (fsd, meandiff, covdiff))
    if args.histout is not None:
        diff_figure(true_tally,
                    gen_tally,
                    labelcount=args.labelcount,
                    maxscale=args.maxscale,
                    dpi=args.dpi).savefig(args.histout)
def test_dissection():
    pbar.verbose(True)
    from torchvision.models import alexnet
    from torchvision import transforms
    model = InstrumentedModel(alexnet(pretrained=True))
    model.eval()
    # Load an alexnet
    model.retain_layers([
        ('features.0', 'conv1'),
        ('features.3', 'conv2'),
        ('features.6', 'conv3'),
        ('features.8', 'conv4'),
        ('features.10', 'conv5') ])
    # load broden dataset
    bds = BrodenDataset('datasets/broden',
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(IMAGE_MEAN, IMAGE_STDEV)]),
            size=100)
    # run dissect
    dissect('dissect/test', model, bds,
            examples_per_unit=10)
Exemple #3
0
def main():
    # Training settings
    def strpair(arg):
        p = tuple(arg.split(':'))
        if len(p) == 1:
            p = p + p
        return p

    parser = argparse.ArgumentParser(
        description='Ablation eval',
        epilog=textwrap.dedent(help_epilog),
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--model',
                        type=str,
                        default=None,
                        help='constructor for the model to test')
    parser.add_argument('--pthfile',
                        type=str,
                        default=None,
                        help='filename of .pth file for the model')
    parser.add_argument('--outdir',
                        type=str,
                        default='dissect',
                        required=True,
                        help='directory for dissection output')
    parser.add_argument('--layers',
                        type=strpair,
                        nargs='+',
                        help='space-separated list of layer names to edit' +
                        ', in the form layername[:reportedname]')
    parser.add_argument('--classes',
                        type=str,
                        nargs='+',
                        help='space-separated list of class names to ablate')
    parser.add_argument('--metric',
                        type=str,
                        default='iou',
                        help='ordering metric for selecting units')
    parser.add_argument('--unitcount',
                        type=int,
                        default=30,
                        help='number of units to ablate')
    parser.add_argument('--segmenter',
                        type=str,
                        help='directory containing segmentation dataset')
    parser.add_argument('--netname',
                        type=str,
                        default=None,
                        help='name for network in generated reports')
    parser.add_argument('--batch_size',
                        type=int,
                        default=5,
                        help='batch size for forward pass')
    parser.add_argument('--size',
                        type=int,
                        default=200,
                        help='number of images to test')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA usage')
    parser.add_argument('--quiet',
                        action='store_true',
                        default=False,
                        help='silences console output')
    if len(sys.argv) == 1:
        parser.print_usage(sys.stderr)
        sys.exit(1)
    args = parser.parse_args()

    # Set up console output
    pbar.verbose(not args.quiet)

    # Speed up pytorch
    torch.backends.cudnn.benchmark = True

    # Set up CUDA
    args.cuda = not args.no_cuda and torch.cuda.is_available()
    if args.cuda:
        torch.backends.cudnn.benchmark = True

    # Take defaults for model constructor etc from dissect.json settings.
    with open(os.path.join(args.outdir, 'dissect.json')) as f:
        dissection = EasyDict(json.load(f))
    if args.model is None:
        args.model = dissection.settings.model
    if args.pthfile is None:
        args.pthfile = dissection.settings.pthfile
    if args.segmenter is None:
        args.segmenter = dissection.settings.segmenter

    # Instantiate generator
    model = create_instrumented_model(args, gen=True, edit=True)
    if model is None:
        print('No model specified')
        sys.exit(1)

    # Instantiate model
    device = next(model.parameters()).device
    input_shape = model.input_shape

    # 4d input if convolutional, 2d input if first layer is linear.
    raw_sample = standard_z_sample(args.size, input_shape[1],
                                   seed=2).view((args.size, ) +
                                                input_shape[1:])
    dataset = TensorDataset(raw_sample)

    # Create the segmenter
    segmenter = autoimport_eval(args.segmenter)

    # Now do the actual work.
    labelnames, catnames = (segmenter.get_label_and_category_names(dataset))
    label_category = [
        catnames.index(c) if c in catnames else 0 for l, c in labelnames
    ]
    labelnum_from_name = {n[0]: i for i, n in enumerate(labelnames)}

    segloader = torch.utils.data.DataLoader(dataset,
                                            batch_size=args.batch_size,
                                            num_workers=10,
                                            pin_memory=(device.type == 'cuda'))

    # Index the dissection layers by layer name.
    dissect_layer = {lrec.layer: lrec for lrec in dissection.layers}

    # First, collect a baseline
    for l in model.ablation:
        model.ablation[l] = None

    # For each sort-order, do an ablation
    for classname in pbar(args.classes):
        pbar.post(c=classname)
        for layername in pbar(model.ablation):
            pbar.post(l=layername)
            rankname = '%s-%s' % (classname, args.metric)
            classnum = labelnum_from_name[classname]
            try:
                ranking = next(r for r in dissect_layer[layername].rankings
                               if r.name == rankname)
            except:
                print('%s not found' % rankname)
                sys.exit(1)
            ordering = numpy.argsort(ranking.score)
            # Check if already done
            ablationdir = os.path.join(args.outdir, layername, 'pixablation')
            if os.path.isfile(os.path.join(ablationdir, '%s.json' % rankname)):
                with open(os.path.join(ablationdir,
                                       '%s.json' % rankname)) as f:
                    data = EasyDict(json.load(f))
                # If the unit ordering is not the same, something is wrong
                if not all(a == o
                           for a, o in zip(data.ablation_units, ordering)):
                    continue
                if len(data.ablation_effects) >= args.unitcount:
                    continue  # file already done.
                measurements = data.ablation_effects
            measurements = measure_ablation(segmenter, segloader, model,
                                            classnum, layername,
                                            ordering[:args.unitcount])
            measurements = measurements.cpu().numpy().tolist()
            os.makedirs(ablationdir, exist_ok=True)
            with open(os.path.join(ablationdir, '%s.json' % rankname),
                      'w') as f:
                json.dump(
                    dict(classname=classname,
                         classnum=classnum,
                         baseline=measurements[0],
                         layer=layername,
                         metric=args.metric,
                         ablation_units=ordering.tolist(),
                         ablation_effects=measurements[1:]), f)
def main():
    # Training settings
    def strpair(arg):
        p = tuple(arg.split(':'))
        if len(p) == 1:
            p = p + p
        return p
    def intpair(arg):
        p = arg.split(',')
        if len(p) == 1:
            p = p + p
        return tuple(int(v) for v in p)

    parser = argparse.ArgumentParser(description='Net dissect utility',
            prog='python -m netdissect',
            epilog=textwrap.dedent(help_epilog),
            formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--model', type=str, default=None,
                        help='constructor for the model to test')
    parser.add_argument('--pthfile', type=str, default=None,
                        help='filename of .pth file for the model')
    parser.add_argument('--unstrict', action='store_true', default=False,
                        help='ignore unexpected pth parameters')
    parser.add_argument('--modelkey', type=str, default=None,
                        help='key within pthfile containing state_dict')
    parser.add_argument('--submodule', type=str, default=None,
                        help='submodule to load from pthfile state dict')
    parser.add_argument('--outdir', type=str, default='dissect',
                        help='directory for dissection output')
    parser.add_argument('--layers', type=strpair, nargs='+',
                        help='space-separated list of layer names to dissect' +
                        ', in the form layername[:reportedname]')
    parser.add_argument('--segments', type=str, default='datasets/broden',
                        help='directory containing segmentation dataset')
    parser.add_argument('--segmenter', type=str, default=None,
                        help='constructor for asegmenter class')
    parser.add_argument('--normalizer', type=str, default=None,
                        help='Normalize rgb with imagenet, zc, or pt ranges')
    parser.add_argument('--download', action='store_true', default=False,
                        help='downloads Broden dataset if needed')
    parser.add_argument('--imagedir', type=str, default=None,
                        help='directory containing image-only dataset')
    parser.add_argument('--imgsize', type=intpair, default=(227, 227),
                        help='input image size to use')
    parser.add_argument('--netname', type=str, default=None,
                        help='name for network in generated reports')
    parser.add_argument('--meta', type=str, nargs='+',
                        help='json files of metadata to add to report')
    parser.add_argument('--merge', type=str,
                        help='json file of unit data to merge in report')
    parser.add_argument('--examples', type=int, default=20,
                        help='number of image examples per unit')
    parser.add_argument('--size', type=int, default=10000,
                        help='dataset subset size to use')
    parser.add_argument('--batch_size', type=int, default=100,
                        help='batch size for forward pass')
    parser.add_argument('--num_workers', type=int, default=24,
                        help='number of DataLoader workers')
    parser.add_argument('--quantile_threshold', type=strfloat, default=None,
                        choices=[FloatRange(0.0, 1.0), 'iqr'],
                        help='quantile to use for masks')
    parser.add_argument('--whiten', default=None,
                        help='set to pca to whiten units')
    parser.add_argument('--no-labels', action='store_true', default=False,
                        help='disables labeling of units')
    parser.add_argument('--maxiou', action='store_true', default=False,
                        help='enables maxiou calculation')
    parser.add_argument('--covariance', action='store_true', default=False,
                        help='enables covariance calculation')
    parser.add_argument('--rank_all_labels', action='store_true', default=False,
                        help='include low-information labels in rankings')
    parser.add_argument('--no-images', action='store_true', default=False,
                        help='disables generation of unit images')
    parser.add_argument('--no-report', action='store_true', default=False,
                        help='disables generation report summary')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA usage')
    parser.add_argument('--gen', action='store_true', default=False,
                        help='test a generator model (e.g., a GAN)')
    parser.add_argument('--gan', action='store_true', default=False,
                        help='synonym for --gen')
    parser.add_argument('--perturbation', default=None,
                        help='filename of perturbation attack to apply')
    parser.add_argument('--add_scale_offset', action='store_true', default=None,
                        help='offsets masks according to stride and padding')
    parser.add_argument('--quiet', action='store_true', default=False,
                        help='silences console output')
    if len(sys.argv) == 1:
        parser.print_usage(sys.stderr)
        sys.exit(1)
    args = parser.parse_args()
    args.images = not args.no_images
    args.report = not args.no_report
    args.labels = not args.no_labels
    if args.gan:
        args.gen = args.gan

    # Set up console output
    pbar.verbose(not args.quiet)

    # Exit right away if job is already done or being done.
    if args.outdir is not None:
        exit_if_job_done(args.outdir)

    # Speed up pytorch
    torch.backends.cudnn.benchmark = True

    # Special case: download flag without model to test.
    if args.model is None and args.download:
        from netdissect.broden import ensure_broden_downloaded
        for resolution in [224, 227, 384]:
            ensure_broden_downloaded(args.segments, resolution, 1)
        from netdissect.segmenter import ensure_upp_segmenter_downloaded
        ensure_upp_segmenter_downloaded('datasets/segmodel')
        sys.exit(0)

    # Help if broden is not present
    if not args.gen and not args.imagedir and not os.path.isdir(args.segments):
        pbar.print('Segmentation dataset not found at %s.' % args.segments)
        pbar.print('Specify dataset directory using --segments [DIR]')
        pbar.print('To download Broden, run: netdissect --download')
        sys.exit(1)

    # Default segmenter class
    if args.gen and args.segmenter is None:
        args.segmenter = ("netdissect.segmenter.UnifiedParsingSegmenter(" +
                "segsizes=[256], segdiv='quad')")

    # Default threshold
    if args.quantile_threshold is None:
        if args.gen:
            args.quantile_threshold = 'iqr'
        else:
            args.quantile_threshold = 0.005

    # Set up CUDA
    args.cuda = not args.no_cuda and torch.cuda.is_available()
    if args.cuda:
        torch.backends.cudnn.benchmark = True

    # Construct the network with specified layers instrumented
    if args.model is None:
        pbar.print('No model specified')
        sys.exit(1)
    model = create_instrumented_model(args)

    # Update any metadata from files, if any
    meta = getattr(model, 'meta', {})
    if args.meta:
        for mfilename in args.meta:
            with open(mfilename) as f:
                meta.update(json.load(f))

    # Load any merge data from files
    mergedata = None
    if args.merge:
        with open(args.merge) as f:
            mergedata = json.load(f)

    # Set up the output directory, verify write access
    if args.outdir is None:
        args.outdir = os.path.join('dissect', type(model).__name__)
        exit_if_job_done(args.outdir)
        pbar.print('Writing output into %s.' % args.outdir)
    os.makedirs(args.outdir, exist_ok=True)
    train_dataset = None

    if not args.gen:
        # Load dataset for classifier case.
        # Load perturbation
        perturbation = numpy.load(args.perturbation
                ) if args.perturbation else None
        segrunner = None

        # Load broden dataset
        if args.imagedir is not None:
            dataset = try_to_load_images(args.imagedir, args.imgsize,
                    perturbation, args.size)
            segrunner = ImageOnlySegRunner(dataset)
        else:
            dataset = try_to_load_broden(args.segments, args.imgsize, 1,
                perturbation, args.download, args.size,
                normalizer_named(args.normalizer))
        if dataset is None:
            dataset = try_to_load_multiseg(args.segments, args.imgsize,
                    perturbation, args.size)
        if dataset is None:
            pbar.print('No segmentation dataset found in %s',
                    args.segments)
            pbar.print('use --download to download Broden.')
            sys.exit(1)
    else:
        # For segmenter case the dataset is just a random z
        dataset = z_dataset_for_model(model, args.size)
        train_dataset = z_dataset_for_model(model, args.size, seed=2)
        segrunner = GeneratorSegRunner(autoimport_eval(args.segmenter))

    # Run dissect
    dissect(args.outdir, model, dataset,
            train_dataset=train_dataset,
            segrunner=segrunner,
            examples_per_unit=args.examples,
            netname=args.netname,
            quantile_threshold=args.quantile_threshold,
            meta=meta,
            merge=mergedata,
            pca_units=(args.whiten == 'pca'),
            make_images=args.images,
            make_labels=args.labels,
            make_maxiou=args.maxiou,
            make_covariance=args.covariance,
            make_report=args.report,
            make_row_images=args.images,
            make_single_images=True,
            rank_all_labels=args.rank_all_labels,
            batch_size=args.batch_size,
            num_workers=args.num_workers,
            settings=vars(args))

    # Mark the directory so that it's not done again.
    mark_job_done(args.outdir)
Exemple #5
0
def run_command(args):
    pbar.verbose(True)
    classname = args.classname  # 'door'
    layer = args.layer  # 'layer4'
    num_eval_units = 20

    assert os.path.isfile(os.path.join(
        args.outdir, 'dissect.json')), ("Should be a dissection directory")

    if args.variant is None:
        args.variant = 'ace'

    if args.l2_lambda != 0.005:
        args.variant = '%s_reg%g' % (args.variant, args.l2_lambda)

    cachedir = os.path.join(args.outdir, safe_dir_name(layer), args.variant,
                            classname)

    if pidfile_taken(os.path.join(cachedir, 'lock.pid'), True):
        sys.exit(0)

    # Take defaults for model constructor etc from dissect.json settings.
    with open(os.path.join(args.outdir, 'dissect.json')) as f:
        dissection = EasyDict(json.load(f))
    if args.model is None:
        args.model = dissection.settings.model
    if args.pthfile is None:
        args.pthfile = dissection.settings.pthfile
    if args.segmenter is None:
        args.segmenter = dissection.settings.segmenter
    # Default segmenter class
    if args.segmenter is None:
        args.segmenter = ("netdissect.segmenter.UnifiedParsingSegmenter(" +
                          "segsizes=[256], segdiv='quad')")

    if (not args.no_cache and os.path.isfile(
            os.path.join(cachedir, 'snapshots', 'epoch-%d.npy' %
                         (args.train_epochs - 1)))
            and os.path.isfile(os.path.join(cachedir, 'report.json'))):
        print('%s already done' % cachedir)
        sys.exit(0)

    os.makedirs(cachedir, exist_ok=True)

    # Instantiate generator
    model = create_instrumented_model(args,
                                      gen=True,
                                      edit=True,
                                      layers=[args.layer])
    if model is None:
        print('No model specified')
        sys.exit(1)
    # Instantiate segmenter
    segmenter = autoimport_eval(args.segmenter)
    labelnames, catname = segmenter.get_label_and_category_names()
    classnum = [i for i, (n, c) in enumerate(labelnames) if n == classname][0]
    num_classes = len(labelnames)
    with open(os.path.join(cachedir, 'labelnames.json'), 'w') as f:
        json.dump(labelnames, f, indent=1)

    # Sample sets for training.
    full_sample = netdissect.zdataset.z_sample_for_model(model,
                                                         args.search_size,
                                                         seed=10)
    second_sample = netdissect.zdataset.z_sample_for_model(model,
                                                           args.search_size,
                                                           seed=11)
    # Load any cached data.
    cache_filename = os.path.join(cachedir, 'corpus.npz')
    corpus = EasyDict()
    try:
        if not args.no_cache:
            corpus = EasyDict({
                k: torch.from_numpy(v)
                for k, v in numpy.load(cache_filename).items()
            })
    except:
        pass

    # The steps for the computation.
    compute_present_locations(args, corpus, cache_filename, model, segmenter,
                              classnum, full_sample)
    compute_mean_present_features(args, corpus, cache_filename, model)
    compute_feature_quantiles(args, corpus, cache_filename, model, full_sample)
    compute_candidate_locations(args, corpus, cache_filename, model, segmenter,
                                classnum, second_sample)
    # visualize_training_locations(args, corpus, cachedir, model)
    init_ablation = initial_ablation(args, args.outdir)
    scores = train_ablation(args, corpus, cache_filename, model, segmenter,
                            classnum, init_ablation)
    summarize_scores(args, corpus, cachedir, layer, classname, args.variant,
                     scores)
    if args.variant == 'ace':
        add_ace_ranking_to_dissection(args.outdir, layer, classname, scores)