Exemple #1
0
def main():
    # Training settings
    def strpair(arg):
        p = tuple(arg.split(':'))
        if len(p) == 1:
            p = p + p
        return p

    def intpair(arg):
        p = arg.split(',')
        if len(p) == 1:
            p = p + p
        return tuple(int(v) for v in p)

    parser = argparse.ArgumentParser(
        description='Net dissect utility',
        prog='python -m netdissect',
        epilog=textwrap.dedent(help_epilog),
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--model',
                        type=str,
                        default=None,
                        help='constructor for the model to test')
    parser.add_argument('--pthfile',
                        type=str,
                        default=None,
                        help='filename of .pth file for the model')
    parser.add_argument('--unstrict',
                        action='store_true',
                        default=False,
                        help='ignore unexpected pth parameters')
    parser.add_argument('--submodule',
                        type=str,
                        default=None,
                        help='submodule to load from pthfile')
    parser.add_argument('--outdir',
                        type=str,
                        default='dissect',
                        help='directory for dissection output')
    parser.add_argument('--layers',
                        type=strpair,
                        nargs='+',
                        help='space-separated list of layer names to dissect' +
                        ', in the form layername[:reportedname]')
    parser.add_argument('--segments',
                        type=str,
                        default='dataset/broden',
                        help='directory containing segmentation dataset')
    parser.add_argument('--segmenter',
                        type=str,
                        default=None,
                        help='constructor for asegmenter class')
    parser.add_argument('--download',
                        action='store_true',
                        default=False,
                        help='downloads Broden dataset if needed')
    parser.add_argument('--imagedir',
                        type=str,
                        default=None,
                        help='directory containing image-only dataset')
    parser.add_argument('--imgsize',
                        type=intpair,
                        default=(227, 227),
                        help='input image size to use')
    parser.add_argument('--netname',
                        type=str,
                        default=None,
                        help='name for network in generated reports')
    parser.add_argument('--meta',
                        type=str,
                        nargs='+',
                        help='json files of metadata to add to report')
    parser.add_argument('--merge',
                        type=str,
                        help='json file of unit data to merge in report')
    parser.add_argument('--examples',
                        type=int,
                        default=20,
                        help='number of image examples per unit')
    parser.add_argument('--size',
                        type=int,
                        default=10000,
                        help='dataset subset size to use')
    parser.add_argument('--batch_size',
                        type=int,
                        default=100,
                        help='batch size for forward pass')
    parser.add_argument('--num_workers',
                        type=int,
                        default=24,
                        help='number of DataLoader workers')
    parser.add_argument('--quantile_threshold',
                        type=strfloat,
                        default=None,
                        choices=[FloatRange(0.0, 1.0), 'iqr'],
                        help='quantile to use for masks')
    parser.add_argument('--no-labels',
                        action='store_true',
                        default=False,
                        help='disables labeling of units')
    parser.add_argument('--maxiou',
                        action='store_true',
                        default=False,
                        help='enables maxiou calculation')
    parser.add_argument('--covariance',
                        action='store_true',
                        default=False,
                        help='enables covariance calculation')
    parser.add_argument('--rank_all_labels',
                        action='store_true',
                        default=False,
                        help='include low-information labels in rankings')
    parser.add_argument('--no-images',
                        action='store_true',
                        default=False,
                        help='disables generation of unit images')
    parser.add_argument('--no-report',
                        action='store_true',
                        default=False,
                        help='disables generation report summary')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA usage')
    parser.add_argument('--gen',
                        action='store_true',
                        default=False,
                        help='test a generator model (e.g., a GAN)')
    parser.add_argument('--gan',
                        action='store_true',
                        default=False,
                        help='synonym for --gen')
    parser.add_argument('--perturbation',
                        default=None,
                        help='filename of perturbation attack to apply')
    parser.add_argument('--add_scale_offset',
                        action='store_true',
                        default=None,
                        help='offsets masks according to stride and padding')
    parser.add_argument('--quiet',
                        action='store_true',
                        default=False,
                        help='silences console output')
    if len(sys.argv) == 1:
        parser.print_usage(sys.stderr)
        sys.exit(1)
    args = parser.parse_args()
    args.images = not args.no_images
    args.report = not args.no_report
    args.labels = not args.no_labels
    if args.gan:
        args.gen = args.gan

    # Set up console output
    verbose_progress(not args.quiet)

    # Exit right away if job is already done or being done.
    if args.outdir is not None:
        exit_if_job_done(args.outdir)

    # Speed up pytorch
    torch.backends.cudnn.benchmark = True

    # Special case: download flag without model to test.
    if args.model is None and args.download:
        from netdissect.broden import ensure_broden_downloaded
        for resolution in [224, 227, 384]:
            ensure_broden_downloaded(args.segments, resolution, 1)
        from netdissect.segmenter import ensure_upp_segmenter_downloaded
        ensure_upp_segmenter_downloaded('dataset/segmodel')
        sys.exit(0)

    # Help if broden is not present
    if not args.gen and not args.imagedir and not os.path.isdir(args.segments):
        print_progress('Segmentation dataset not found at %s.' % args.segments)
        print_progress('Specify dataset directory using --segments [DIR]')
        print_progress('To download Broden, run: netdissect --download')
        sys.exit(1)

    # Default segmenter class
    if args.gen and args.segmenter is None:
        args.segmenter = ("netdissect.segmenter.UnifiedParsingSegmenter(" +
                          "segsizes=[256], segdiv=None)")

    # Default threshold
    if args.quantile_threshold is None:
        if args.gen:
            args.quantile_threshold = 'iqr'
        else:
            args.quantile_threshold = 0.005

    # Set up CUDA
    args.cuda = not args.no_cuda and torch.cuda.is_available()
    if args.cuda:
        torch.backends.cudnn.benchmark = True

    # Construct the network with specified layers instrumented
    if args.model is None:
        print_progress('No model specified')
        sys.exit(1)
    model = create_instrumented_model(args)

    # Update any metadata from files, if any
    meta = getattr(model, 'meta', {})
    if args.meta:
        for mfilename in args.meta:
            with open(mfilename) as f:
                meta.update(json.load(f))

    # Load any merge data from files
    mergedata = None
    if args.merge:
        with open(args.merge) as f:
            mergedata = json.load(f)

    # Set up the output directory, verify write access
    if args.outdir is None:
        args.outdir = os.path.join('dissect', type(model).__name__)
        exit_if_job_done(args.outdir)
        print_progress('Writing output into %s.' % args.outdir)
    os.makedirs(args.outdir, exist_ok=True)
    train_dataset = None

    if not args.gen:
        # Load dataset for classifier case.
        # Load perturbation
        perturbation = numpy.load(
            args.perturbation) if args.perturbation else None
        segrunner = None

        # Load broden dataset
        if args.imagedir is not None:
            dataset = try_to_load_images(args.imagedir, args.imgsize,
                                         perturbation, args.size)
            segrunner = ImageOnlySegRunner(dataset)
        else:
            dataset = try_to_load_broden(args.segments, args.imgsize, 1,
                                         perturbation, args.download,
                                         args.size)
        if dataset is None:
            dataset = try_to_load_multiseg(args.segments, args.imgsize,
                                           perturbation, args.size)
        if dataset is None:
            print_progress('No segmentation dataset found in %s',
                           args.segments)
            print_progress('use --download to download Broden.')
            sys.exit(1)
    else:
        # For segmenter case the dataset is just a random z
        #dataset = z_dataset_for_model(model, args.size)
        #train_dataset = z_dataset_for_model(model, args.size, seed=2)
        dataset = datasets.ImageFolder('dataset/Adissect',
                                       transform=transforms.Compose([
                                           transforms.ToTensor(),
                                           transforms.Normalize(
                                               (0.5, 0.5, 0.5),
                                               (0.5, 0.5, 0.5))
                                       ]))
        train_dataset = dataset
        segrunner = GeneratorSegRunner(autoimport_eval(args.segmenter))
    torch.no_grad()
    # Run dissect
    dissect(args.outdir,
            model,
            dataset,
            train_dataset=train_dataset,
            segrunner=segrunner,
            examples_per_unit=args.examples,
            netname=args.netname,
            quantile_threshold=args.quantile_threshold,
            meta=meta,
            merge=mergedata,
            make_images=args.images,
            make_labels=args.labels,
            make_maxiou=args.maxiou,
            make_covariance=args.covariance,
            make_report=args.report,
            make_row_images=args.images,
            make_single_images=True,
            rank_all_labels=args.rank_all_labels,
            batch_size=args.batch_size,
            num_workers=args.num_workers,
            settings=vars(args))

    # Mark the directory so that it's not done again.
    mark_job_done(args.outdir)
Exemple #2
0
def main():
    # Training settings
    def strpair(arg):
        p = tuple(arg.split(':'))
        if len(p) == 1:
            p = p + p
        return p

    def intpair(arg):
        p = arg.split(',')
        if len(p) == 1:
            p = p + p
        return tuple(int(v) for v in p)

    parser = argparse.ArgumentParser(
        description='Net dissect utility',
        prog='python -m netdissect',
        epilog=textwrap.dedent(help_epilog),
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--model',
                        type=str,
                        default=None,
                        help='constructor for the model to test')
    parser.add_argument('--pthfile',
                        type=str,
                        default=None,
                        help='filename of .pth file for the model')
    parser.add_argument('--outdir',
                        type=str,
                        default='dissect',
                        help='directory for dissection output')
    parser.add_argument('--layers',
                        type=strpair,
                        nargs='+',
                        help='space-separated list of layer names to dissect' +
                        ', in the form layername[:reportedname]')
    parser.add_argument('--segments',
                        type=str,
                        default='dataset/broden',
                        help='directory containing segmentation dataset')
    parser.add_argument('--download',
                        action='store_true',
                        default=False,
                        help='downloads Broden dataset if needed')
    parser.add_argument('--imgsize',
                        type=intpair,
                        default=(227, 227),
                        help='input image size to use')
    parser.add_argument('--netname',
                        type=str,
                        default=None,
                        help='name for network in generated reports')
    parser.add_argument('--meta',
                        type=str,
                        nargs='+',
                        help='json files of metadata to add to report')
    parser.add_argument('--examples',
                        type=int,
                        default=20,
                        help='number of image examples per unit')
    parser.add_argument('--size',
                        type=int,
                        default=10000,
                        help='dataset subset size to use')
    parser.add_argument('--broden_version',
                        type=int,
                        default=1,
                        help='broden version number')
    parser.add_argument('--batch_size',
                        type=int,
                        default=100,
                        help='batch size for forward pass')
    parser.add_argument('--num_workers',
                        type=int,
                        default=24,
                        help='number of DataLoader workers')
    parser.add_argument('--quantile_threshold',
                        type=float,
                        default=None,
                        help='quantile to use for masks')
    parser.add_argument('--no-labels',
                        action='store_true',
                        default=False,
                        help='disables labeling of units')
    parser.add_argument('--ablation',
                        action='store_true',
                        default=False,
                        help='enables single unit ablation of units')
    parser.add_argument('--iqr',
                        action='store_true',
                        default=False,
                        help='enables iqr calculation')
    parser.add_argument('--maxiou',
                        action='store_true',
                        default=False,
                        help='enables maxiou calculation')
    parser.add_argument('--covariance',
                        action='store_true',
                        default=False,
                        help='enables covariance calculation')
    parser.add_argument('--no-images',
                        action='store_true',
                        default=False,
                        help='disables generation of unit images')
    parser.add_argument('--single-images',
                        action='store_true',
                        default=False,
                        help='generates single images also')
    parser.add_argument('--no-report',
                        action='store_true',
                        default=False,
                        help='disables generation report summary')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA usage')
    parser.add_argument('--gan',
                        type=str,
                        default=None,
                        help='netdissect.GanImageSegmenter() to probe a GAN')
    parser.add_argument('--perturbation',
                        default=None,
                        help='filename of perturbation attack to apply')
    parser.add_argument('--add_scale_offset',
                        action='store_true',
                        default=None,
                        help='offsets masks according to stride and padding')
    parser.add_argument('--quiet',
                        action='store_true',
                        default=False,
                        help='silences console output')
    if len(sys.argv) == 1:
        parser.print_usage(sys.stderr)
        sys.exit(1)
    args = parser.parse_args()
    args.images = not args.no_images
    args.report = not args.no_report
    args.labels = not args.no_labels

    # Set up console output
    verbose_progress(not args.quiet)

    # Speed up pytorch
    torch.backends.cudnn.benchmark = True

    # Special case: download flag without model to test.
    if args.model is None and args.download:
        from netdissect.broden import ensure_broden_downloaded
        for resolution in [224, 227, 384]:
            ensure_broden_downloaded(args.segments, resolution,
                                     args.broden_version)
        sys.exit(0)

    # Help if broden is not present
    if not os.path.isdir(args.segments):
        print_progress('Segmentation dataset not found at %s.' % args.segments)
        print_progress('Specify dataset directory using --segments [DIR]')
        print_progress('To download Broden, run: netdissect --download')
        sys.exit(1)

    # Default threshold
    if args.quantile_threshold is None:
        if args.gan:
            args.quantile_threshold = 0.01
        else:
            args.quantile_threshold = 0.005

    # Construct the network
    if args.model is None:
        print_progress('No model specified')
        sys.exit(1)

    # Set up CUDA
    args.cuda = not args.no_cuda and torch.cuda.is_available()
    if args.cuda:
        torch.backends.cudnn.benchmark = True

    model = autoimport_eval(args.model)
    # Unwrap any DataParallel-wrapped model
    if isinstance(model, torch.nn.DataParallel):
        model = next(model.children())

    # Default add_scale_offset only for AlexNet-looking models.
    if args.add_scale_offset is None and not args.gan:
        args.add_scale_offset = ('Alex' in model.__class__.__name__)

    # Load its state dict
    meta = {}
    if args.pthfile is None:
        print_progress('Dissecting model without pth file.')
    else:
        data = torch.load(args.pthfile)
        if 'state_dict' in data:
            meta = {}
            for key in data:
                if isinstance(data[key], numbers.Number):
                    meta[key] = data[key]
            data = data['state_dict']
        model.load_state_dict(data)

    # Update any metadata from files, if any
    if args.meta:
        for mfilename in args.meta:
            with open(mfilename) as f:
                meta.update(json.load(f))

    # Instrument it and prepare it for eval
    if not args.layers:
        # Skip wrappers with only one named modele
        container = model
        prefix = ''
        while len(list(container.named_children())) == 1:
            name, container = next(container.named_children())
            prefix += name + '.'
        # Default to all nontrivial top-level layers except last.
        args.layers = [
            prefix + name for name, module in container.named_children()
            if type(module).__module__ not in [
                # Skip ReLU and other activations.
                'torch.nn.modules.activation',
                # Skip pooling layers.
                'torch.nn.modules.pooling'
            ]
        ][:-1]
        print_progress('Defaulting to layers: %s' % ' '.join(args.layers))
    retain_layers(model, args.layers, args.add_scale_offset)
    if args.gan:
        ablate_layers(model, args.layers)
    model.eval()
    if args.cuda:
        model.cuda()

    # Set up the output directory, verify write access
    if args.outdir is None:
        args.outdir = os.path.join('dissect', type(model).__name__)
        print_progress('Writing output into %s.' % args.outdir)
    os.makedirs(args.outdir, exist_ok=True)
    train_dataset = None

    if not args.gan:
        # Load dataset for ordinary case.
        # Load perturbation
        perturbation = numpy.load(
            args.perturbation) if args.perturbation else None

        # Load broden dataset
        dataset = try_to_load_broden(args.segments, args.imgsize,
                                     args.broden_version, perturbation,
                                     args.download, args.size)
        if dataset is None:
            ds = try_to_load_multiseg(args.segments, args.imgsize,
                                      perturbation, args.size)
        if dataset is None:
            print_progress('No segmentation dataset found in %s' %
                           args.segements)
            print_progress('use --download to download Broden.')
            sys.exit(1)

        recovery = ReverseNormalize(IMAGE_MEAN, IMAGE_STDEV)
    else:
        # Examine first conv in model to determine input feature size.
        first_layer = [
            c for c in model.modules()
            if isinstance(c, (torch.nn.Conv2d, torch.nn.ConvTranspose2d,
                              torch.nn.Linear))
        ][0]
        # 4d input if convolutional, 2d input if first layer is linear.
        if isinstance(first_layer,
                      (torch.nn.Conv2d, torch.nn.ConvTranspose2d)):
            sample = standard_z_sample(args.size,
                                       first_layer.in_channels)[:, :, None,
                                                                None]
            train_sample = standard_z_sample(args.size,
                                             first_layer.in_channels,
                                             seed=2)[:, :, None, None]
        else:
            sample = standard_z_sample(args.size, first_layer.in_features)
            train_sample = standard_z_sample(args.size,
                                             first_layer.in_features,
                                             seed=2)
        dataset = TensorDataset(sample)
        train_dataset = TensorDataset(train_sample)
        recovery = autoimport_eval(args.gan)

    # Run dissect
    dissect(args.outdir,
            model,
            dataset,
            train_dataset=train_dataset,
            recover_image=recovery,
            examples_per_unit=args.examples,
            netname=args.netname,
            quantile_threshold=args.quantile_threshold,
            meta=meta,
            make_images=args.images or args.single_images,
            make_labels=args.labels,
            make_ablation=args.ablation,
            make_iqr=args.iqr,
            make_maxiou=args.maxiou,
            make_covariance=args.covariance,
            make_report=args.report,
            make_row_images=args.images,
            make_single_images=args.single_images,
            batch_size=args.batch_size,
            num_workers=args.num_workers,
            settings=vars(args))
def main():
    # Training settings
    def strpair(arg):
        p = tuple(arg.split(':'))
        if len(p) == 1:
            p = p + p
        return p

    def intpair(arg):
        p = arg.split(',')
        if len(p) == 1:
            p = p + p
        return tuple(int(v) for v in p)

    parser = argparse.ArgumentParser(
        description='Net dissect utility',
        prog='python -m netdissect',
        epilog=textwrap.dedent(help_epilog),
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--model',
                        type=str,
                        default=None,
                        help='constructor for the model to test')
    parser.add_argument('--pthfile',
                        type=str,
                        default=None,
                        help='filename of .pth file for the model')
    parser.add_argument('--outdir',
                        type=str,
                        default='dissect',
                        help='directory for dissection output')
    parser.add_argument('--layers',
                        type=strpair,
                        nargs='+',
                        help='space-separated list of layer names to dissect' +
                        ', in the form layername[:reportedname]')
    parser.add_argument('--segments',
                        type=str,
                        default='dataset/broden',
                        help='directory containing segmentation dataset')
    parser.add_argument('--download',
                        action='store_true',
                        default=False,
                        help='downloads Broden dataset if needed')
    parser.add_argument('--imgsize',
                        type=intpair,
                        default=(227, 227),
                        help='input image size to use')
    parser.add_argument('--netname',
                        type=str,
                        default=None,
                        help='name for network in generated reports')
    parser.add_argument('--meta',
                        type=str,
                        nargs='+',
                        help='json files of metadata to add to report')
    parser.add_argument('--examples',
                        type=int,
                        default=20,
                        help='number of image examples per unit')
    parser.add_argument('--size',
                        type=int,
                        default=10000,
                        help='dataset subset size to use')
    parser.add_argument('--broden_version',
                        type=int,
                        default=1,
                        help='broden version number')
    parser.add_argument('--batch_size',
                        type=int,
                        default=100,
                        help='batch size for forward pass')
    parser.add_argument('--num_workers',
                        type=int,
                        default=24,
                        help='number of DataLoader workers')
    parser.add_argument('--no-labels',
                        action='store_true',
                        default=False,
                        help='disables labeling of units')
    parser.add_argument('--no-images',
                        action='store_true',
                        default=False,
                        help='disables generation of unit images')
    parser.add_argument('--single-images',
                        action='store_true',
                        default=False,
                        help='generates single images also')
    parser.add_argument('--no-report',
                        action='store_true',
                        default=False,
                        help='disables generation report summary')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA usage')
    parser.add_argument('--perturbation',
                        default=None,
                        help='filename of perturbation attack to apply')
    parser.add_argument('--add_scale_offset',
                        action='store_true',
                        default=None,
                        help='offsets masks according to stride and padding')
    parser.add_argument('--quiet',
                        action='store_true',
                        default=False,
                        help='silences console output')
    if len(sys.argv) == 1:
        parser.print_usage(sys.stderr)
        sys.exit(1)
    args = parser.parse_args()
    args.images = not args.no_images
    args.report = not args.no_report
    args.labels = not args.no_labels

    # Set up console output
    verbose_progress(not args.quiet)

    # Speed up pytorch
    torch.backends.cudnn.benchmark = True

    # Special case: download flag without model to test.
    if args.model is None and args.download:
        from netdissect.broden import ensure_broden_downloaded
        for resolution in [224, 227, 384]:
            ensure_broden_downloaded(args.segments, resolution,
                                     args.broden_version)
        sys.exit(0)

    # Help if broden is not present
    if not os.path.isdir(args.segments):
        print_progress('Segmentation dataset not found at %s.' % args.segments)
        print_progress('Specify dataset directory using --segments [DIR]')
        print_progress('To download Broden, run: netdissect --download')
        sys.exit(1)

    # Construct the network
    if args.model is None:
        print_progress('No model specified')
        sys.exit(1)

    # Set up CUDA
    args.cuda = not args.no_cuda and torch.cuda.is_available()
    if args.cuda:
        torch.backends.cudnn.benchmark = True

    model = eval_constructor(args.model)
    # Default add_scale_offset only for AlexNet-looking models.
    if args.add_scale_offset is None:
        args.add_scale_offset = ('Alex' in model.__class__.__name__)

    # Load its state dict
    meta = {}
    if args.pthfile is None:
        print_progress('Dissecting model without pth file.')
    else:
        data = torch.load(args.pthfile)
        if 'state_dict' in data:
            meta = {}
            for key in data:
                if isinstance(data[key], numbers.Number):
                    meta[key] = data[key]
            data = data['state_dict']
        model.load_state_dict(data)

    # Update any metadata from files, if any
    if args.meta:
        for mfilename in args.meta:
            with open(mfilename) as f:
                meta.update(json.load(f))

    # Instrument it and prepare it for eval
    if not args.layers:
        print_progress('No layers specified')
        sys.exit(1)
    retain_layers(model, args.layers, args.add_scale_offset)
    model.eval()
    if args.cuda:
        model.cuda()

    # Set up the output directory, verify write access
    if args.outdir is None:
        args.outdir = os.path.join('dissect', type(model).__name__)
        print_progress('Writing output into %s.' % args.outdir)
    os.makedirs(args.outdir, exist_ok=True)

    # Load perturbation
    perturbation = numpy.load(args.perturbation) if args.perturbation else None

    # Load broden dataset
    ds = try_to_load_broden(args.segments, args.imgsize, args.broden_version,
                            perturbation, args.download, args.size)
    if ds is None:
        ds = try_to_load_multiseg(args.segments, args.imgsize, perturbation,
                                  args.size)
    if ds is None:
        print_progress('No segmentation dataset found in %s' % args.segements)
        print_progress('use --download to download Broden.')
        sys.exit(1)

    # Run dissect
    dissect(args.outdir,
            model,
            ds,
            recover_image=ReverseNormalize(IMAGE_MEAN, IMAGE_STDEV),
            examples_per_unit=args.examples,
            netname=args.netname,
            meta=meta,
            make_images=args.images,
            make_labels=args.labels,
            make_report=args.report,
            make_single_images=args.single_images,
            batch_size=args.batch_size,
            num_workers=args.num_workers,
            settings=vars(args))