'--depth',
                        help='ResNet depth',
                        type=int,
                        default=50,
                        choices=[50, 101, 152])
    parser.add_argument('--arch',
                        help='Name of architectures defined in nets.py',
                        default='ResNet')
    args = parser.parse_args()

    # Define model
    model = getattr(nets, args.arch + 'Model')(args)

    # Define attacker
    if args.attack_iter == 0 or args.eval_directory:
        attacker = NoOpAttacker()
    else:
        attacker = PGDAttacker(
            args.attack_iter,
            args.attack_epsilon,
            args.attack_step_size,
            prob_start_from_clean=0.2 if not args.eval else 0.0)
        if args.use_fp16xla:
            attacker.USE_FP16 = True
            attacker.USE_XLA = True  #False if args.arch.endswith("Dither") else True
    model.set_attacker(attacker)

    os.system("nvidia-smi")
    hvd.init()
    gpu_thread_count = 2
    os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
def do_train(model, image_size=224, buffer_size=2000):
    batch = args.batch
    total_batch = batch * hvd.size()

    if args.fake:
        data = FakeData([[batch, image_size, image_size, 3], [batch]],
                        1000,
                        random=False,
                        dtype=['uint8', 'int32'])
        data = StagingInput(QueueInput(data))
        callbacks = []
        steps_per_epoch = 50
    else:
        logger.info("#Tower: {}; Batch size per tower: {}".format(
            hvd.size(), batch))
        zmq_addr = 'ipc://@imagenet-train-b{}-p{}'.format(batch, args.port)
        if args.no_zmq_ops:
            dataflow = RemoteDataZMQ(zmq_addr, hwm=buffer_size, bind=False)
            data = QueueInput(dataflow)
        else:
            data = ZMQInput(zmq_addr, 30, bind=False)
        data = StagingInput(data)

        steps_per_epoch = int(np.round(1281167 / total_batch))

    BASE_LR = 0.1 * (total_batch // 256)
    """
    ImageNet in 1 Hour, Sec 2.1:
    Linear Scaling Rule: When the minibatch size is
    multiplied by k, multiply the learning rate by k.
    """
    logger.info("Base LR: {}".format(BASE_LR))
    callbacks = [
        ModelSaver(max_to_keep=10),
        EstimatedTimeLeft(),
        ScheduledHyperParamSetter('learning_rate', [(0, BASE_LR),
                                                    (35, BASE_LR * 1e-1),
                                                    (70, BASE_LR * 1e-2),
                                                    (95, BASE_LR * 1e-3)])
    ]
    """
    Feature Denoising, Sec 5:
    Our models are trained for a total of
    110 epochs; we decrease the learning rate by 10× at the 35-
    th, 70-th, and 95-th epoch
    """
    max_epoch = 110

    if BASE_LR > 0.1:
        callbacks.append(
            ScheduledHyperParamSetter('learning_rate',
                                      [(0, 0.1),
                                       (5 * steps_per_epoch, BASE_LR)],
                                      interp='linear',
                                      step_based=True))
        """
        ImageNet in 1 Hour, Sec 2.2:
        we start from a learning rate of η and increment it by a constant amount at
        each iteration such that it reaches ηˆ = kη after 5 epochs
        """

    if not args.fake:
        # add distributed evaluation, for various attackers that we care.
        def add_eval_callback(name, attacker, condition):
            cb = create_eval_callback(
                name,
                model.get_inference_func(attacker),
                # always eval in the last 2 epochs no matter what
                lambda epoch_num: condition(epoch_num) or epoch_num > max_epoch
                - 2,
                image_size=image_size)
            callbacks.append(cb)

        add_eval_callback('eval-clean', NoOpAttacker(), lambda e: True)
        add_eval_callback(
            'eval-10step',
            PGDAttacker(10, args.attack_epsilon, args.attack_step_size),
            lambda e: True)
        add_eval_callback(
            'eval-50step',
            PGDAttacker(50, args.attack_epsilon, args.attack_step_size),
            lambda e: e % 20 == 0)
        add_eval_callback(
            'eval-100step',
            PGDAttacker(100, args.attack_epsilon, args.attack_step_size),
            lambda e: e % 10 == 0 or e > max_epoch - 5)
        for k in [20, 30, 40, 60, 70, 80, 90]:
            add_eval_callback(
                'eval-{}step'.format(k),
                PGDAttacker(k, args.attack_epsilon, args.attack_step_size),
                lambda e: False)

    trainer = HorovodTrainer(average=True)
    trainer.setup_graph(model.get_input_signature(), data, model.build_graph,
                        model.get_optimizer)
    trainer.train_with_defaults(callbacks=callbacks,
                                steps_per_epoch=steps_per_epoch,
                                session_init=SmartInit(args.load),
                                max_epoch=max_epoch,
                                starting_epoch=args.starting_epoch)
Example #3
0
                        '--depth',
                        help='ResNet depth',
                        type=int,
                        default=50,
                        choices=[50, 101, 152])
    parser.add_argument('--arch',
                        help='Name of architectures defined in nets.py',
                        default='ResNet')
    args = parser.parse_args()

    # Define model
    model = getattr(nets, args.arch + 'Model')(args)

    # Define attacker
    if args.attack_iter == 0 or args.eval_directory:
        attacker = NoOpAttacker()
    else:
        attacker = PGDAttacker(
            args.attack_iter,
            args.attack_epsilon,
            args.attack_step_size,
            prob_start_from_clean=0.2 if not args.eval else 0.0)
        if args.use_fp16xla:
            attacker.USE_FP16 = True
            attacker.USE_XLA = True
    model.set_attacker(attacker)

    os.system("nvidia-smi")
    hvd.init()

    if args.eval: