def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--input_dir', required=True, type=str, \
            help='Input directory with images.')
    parser.add_argument('--output_dir', required=True, type=str, \
            help='Output directory with images.')
    parser.add_argument('--max_epsilon', default=16.0, type=float, \
            help='Maximum size of adversarial perturbation.')
    parser.add_argument('--input_dir_mode', default='test', type=str, \
            help='Either flat or hierarchy, how the input dir is organised')
    parser.add_argument('--meta', default='/dev/shm/dev_dataset.csv', type=str, \
            help='True labels for dev set')
    parser.add_argument('--num_samples', default=-1, type=int, \
            help='Number of samples, -1 for all samples')
    parser.add_argument('--train_batch_size', default=24, type=int, \
            help='How many images process at one time.')
    args = parser.parse_args()

    input_dir_abs = os.path.abspath(args.input_dir)

    if args.input_dir_mode == 'flat':
        df_meta = pd.read_csv(args.meta)
        df_meta = df_meta[['ImageId', 'TrueLabel']]
        df_meta['ImageId'] = os.path.abspath(
            args.input_dir) + '/' + df_meta['ImageId'] + '.png'
        df_meta['TrueLabel'] = df_meta['TrueLabel'] - 1
        df_meta = df_meta.set_index('ImageId')
        meta_dict = df_meta.to_dict()['TrueLabel']
        images_info = image_generators.get_names_and_labels(\
                args.input_dir, mode='flat', meta_dict=meta_dict)
    elif args.input_dir_mode == 'flat_targeted':
        df_meta = pd.read_csv(args.meta)
        df_meta = df_meta[['ImageId', 'TrueLabel', 'TargetClass']]
        df_meta['ImageId'] = os.path.abspath(
            args.input_dir) + '/' + df_meta['ImageId'] + '.png'
        df_meta['TrueLabel'] = df_meta['TrueLabel'] - 1
        df_meta['TargetClass'] = df_meta['TargetClass'] - 1
        df_meta = df_meta.set_index('ImageId')
        meta_dict = df_meta.to_dict()['TrueLabel']
        meta_target_dict = df_meta.to_dict()['TargetClass']
        images_info = image_generators.get_names_and_labels(\
                args.input_dir, mode='flat_targeted', meta_dict=meta_dict, meta_target_dict=meta_target_dict)
    elif args.input_dir_mode == 'test':
        images_info = image_generators.get_names_and_labels(\
                args.input_dir, mode='test')
    elif args.input_dir_mode == 'test_targeted':
        df_meta = pd.read_csv(os.path.join(os.path.abspath(args.input_dir), 'target_class.csv'), \
                header=None, names=['ImageId', 'TargetClass'])
        df_meta['TargetClass'] = df_meta['TargetClass'] - 1
        df_meta = df_meta.set_index('ImageId')
        meta_target_dict = df_meta.to_dict()['TargetClass']
        images_info = image_generators.get_names_and_labels(\
                args.input_dir, mode='test_targeted', meta_target_dict=meta_target_dict)
    else:
        images_info = image_generators.get_names_and_labels(\
                args.input_dir, mode='hierarchy')
        image_generators.initialize_hierarchy(args.output_dir)

    print('Total images:', len(images_info))

    eps = args.max_epsilon

    if args.num_samples < 0:
        num_samples = len(images_info)
    else:
        num_samples = args.num_samples

    is_targeted = ('targeted' in args.input_dir_mode)
    print('Is targeted', is_targeted)
    use_avg_pred = True

    print('Loading images')
    # each image_data element: filename, img, lbl
    image_data = image_generators.load_images_into_batches(\
            images_info, args.train_batch_size, \
            max_samples=num_samples, to_buffer=True)
    print('Generating placeholders')
    placeholders = {}
    for i, data in enumerate(image_data):
        imgmat_size = (len(data[0]), 299, 299, 3)
        lblmat_size = (len(data[0]), 1000)
        data_min = task_utils.get_raw_array(init=np.clip(
            task_utils.get_array(data[1], imgmat_size) - eps, 0, None))
        data_max = task_utils.get_raw_array(init=np.clip(
            task_utils.get_array(data[1], imgmat_size) + eps, None, 255))
        data_actual = data[1]
        grad_mat = task_utils.get_raw_array(dims=imgmat_size)
        lbls_mat = task_utils.get_raw_array(
            dims=lblmat_size) if not is_targeted else data[3]
        # data, min, max, grad, lbl, pseudo-label matrix
        placeholders[i] = (\
                data[0], \
                data_actual, \
                data_min, \
                data_max, \
                grad_mat, \
                data[2], \
                lbls_mat)

    print('Preparing functions')

    source_models = [
        'incresv2ensadv', 'resnet50', 'inceptionv3adv', 'inceptionv3'
    ]
    pred_models = ['incresv2ensadv', 'resnet50', 'inceptionv3adv']

    step_size = 1.8
    noise_size = 0.
    grad_aug_scale = 0.
    plan = [\
            (('RST', 'ir2ea-i3a-i3-r50-t', ('incresv2ensadv', 'inceptionv3adv', 'inceptionv3', 'resnet50')), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), ('inceptionv3', 1), step_size, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), ('inceptionv3', 1), step_size, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), ('inceptionv3', 1), step_size, noise_size), \
                (('incresv2ensadv', 1), step_size, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), ('inceptionv3', 1), step_size, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), step_size, noise_size), \
                (('incresv2ensadv', 1), step_size, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), step_size, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), step_size, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), step_size, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), step_size*0.5, noise_size), \
                (('incresv2ensadv', 1), step_size*0.5, noise_size), \
                (('incresv2ensadv', 1), step_size*0.5, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3', 1), step_size*0.5, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), step_size*0.5, noise_size), \
                (('incresv2ensadv', 1), step_size*0.5, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), ('inceptionv3', 1), step_size*0.25, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), ('inceptionv3', 1), step_size*0.25, noise_size), \
                (('incresv2ensadv', 1), step_size*0.25, noise_size), \
                (('incresv2ensadv', 1), step_size*0.25, noise_size), \
                (('incresv2ensadv', 1), step_size*0.25, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), ('inceptionv3', 1), step_size*0.25, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), ('inceptionv3', 1), step_size*0.25, noise_size), \
                (('incresv2ensadv', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3adv', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3', 1), step_size*0.1, noise_size), \
                (('incresv2ensadv', 1), ('inceptionv3', 1), step_size*0.1, noise_size)), \
            ]

    if not task_utils.verify_plan(source_models, pred_models, plan):
        return

    task_list = []
    phase_info = {}
    next_task = {}

    for i_phase, phase in enumerate(plan):
        last_task_in_batch = {}
        if phase[0][0] == 'RST':
            task = ('phase {0} restore from {1} model {2}'.format(i_phase, phase[0][1], phase[0][2]), \
                    (), 7, (phase[0][1],))
            task_list.append(task)
            phase_info[i_phase] = (len(task_list) - 1, set(), set())
            # COPIED
            if (not is_targeted) and (i_phase == 0):
                for phid, (fname_list, imgs, imgs_min, imgs_max, grads, lbls,
                           lbls_mat) in placeholders.items():
                    task = ('batch {0}, fname [{1}], pred'.format(phid, ' '.join([os.path.split(f)[1][:-4] for f in fname_list])), \
                            (fname_list,imgs,None,grads,imgs_min,imgs_max,lbls_mat),3,(None,))
                    task_list.append(task)
                    last_task_in_batch[phid] = len(task_list) - 1
                    phase_info[i_phase][1].add(len(task_list) - 1)
                    phase_info[i_phase][2].add(len(task_list) - 1)
        else:
            if i_phase:
                task = ('phase {0} reload {1}'.format(i_phase, phase[0]), (), 5, \
                        (phase[0][0], use_avg_pred, is_targeted))
                task_list.append(task)
                # id of loading task, all tasks other than load, first tasks after load
                phase_info[i_phase] = (len(task_list) - 1, set(), set())
            else:
                task = ('phase {0} initial load {1} pred {2}'.format(i_phase, phase[0][0], phase[0][1]), (), 6, \
                        (phase[0][0], phase[0][1], use_avg_pred, is_targeted))
                task_list.append(task)
                phase_info[0] = (len(task_list) - 1, set(), set())
                if not is_targeted:
                    for phid, (fname_list, imgs, imgs_min, imgs_max, grads,
                               lbls, lbls_mat) in placeholders.items():
                        task = ('batch {0}, fname [{1}], pred'.format(phid, ' '.join([os.path.split(f)[1][:-4] for f in fname_list])), \
                                (fname_list,imgs,None,grads,imgs_min,imgs_max,lbls_mat),3,(None,))
                        task_list.append(task)
                        last_task_in_batch[phid] = len(task_list) - 1
                        phase_info[i_phase][1].add(len(task_list) - 1)
                        phase_info[i_phase][2].add(len(task_list) - 1)
        for phid, (fname_list, imgs, imgs_min, imgs_max, grads, lbls,
                   lbls_mat) in placeholders.items():
            for i_pl, pl in enumerate(phase[1:]):
                if pl[-1] > 1e-5:
                    task = ('phase {0} batch {1} step {2}, noise {3:.2f}'.format(i_phase, phid, i_pl, pl[-1]), \
                            (fname_list,imgs,None,grads,imgs_min,imgs_max,lbls_mat),4,\
                            (pl[-1],))
                    task_list.append(task)

                    # COPIED
                    if phid not in last_task_in_batch:
                        phase_info[i_phase][2].add(len(task_list) - 1)
                    else:
                        next_task[
                            last_task_in_batch[phid]] = len(task_list) - 1
                    last_task_in_batch[phid] = len(task_list) - 1
                    phase_info[i_phase][1].add(len(task_list) - 1)

                for j_stp, stp in enumerate(pl[:-2]):
                    task = ('phase {0} batch {1} step {2}, grad {3} repeat {4}'.format(i_phase, phid, i_pl, stp[0], stp[1]), \
                            (fname_list,imgs,None,grads,imgs_min,imgs_max,lbls_mat),0,\
                            (None, stp[0], grad_aug_scale, stp[1]))
                    task_list.append(task)

                    # COPIED
                    if phid not in last_task_in_batch:
                        phase_info[i_phase][2].add(len(task_list) - 1)
                    else:
                        next_task[
                            last_task_in_batch[phid]] = len(task_list) - 1
                    last_task_in_batch[phid] = len(task_list) - 1
                    phase_info[i_phase][1].add(len(task_list) - 1)

                task = ('phase {0} batch {1} step {2}, FGSM step, step size {3:.2f}'.format(i_phase, phid, i_pl, pl[-2]), \
                        (fname_list,imgs,None,grads,imgs_min,imgs_max,lbls_mat),1,\
                        (pl[-2],))
                task_list.append(task)
                # COPIED
                if phid not in last_task_in_batch:
                    phase_info[i_phase][2].add(len(task_list) - 1)
                else:
                    next_task[last_task_in_batch[phid]] = len(task_list) - 1
                last_task_in_batch[phid] = len(task_list) - 1
                phase_info[i_phase][1].add(len(task_list) - 1)

            task = ('phase {0} batch {1}, finish'.format(i_phase, phid), \
                    (fname_list,imgs,None,grads,imgs_min,imgs_max,lbls_mat),2,(len(input_dir_abs), args.output_dir))
            task_list.append(task)
            # COPIED
            if phid not in last_task_in_batch:
                phase_info[i_phase][2].add(len(task_list) - 1)
            else:
                next_task[last_task_in_batch[phid]] = len(task_list) - 1
            last_task_in_batch[phid] = len(task_list) - 1
            phase_info[i_phase][1].add(len(task_list) - 1)

    print('Preparation finished', time.time() - start_time)
    print(len(task_list), 'tasks')

    task_utils.run_tasks(task_list,
                         phase_info,
                         next_task,
                         verbose=False,
                         cpu_worker=1)

    print('Time:', time.time() - start_time)
    print('Total images:', len(images_info))
    print('Augmentation:', grad_aug_scale)
    print('\n'.join(str(u) for u in plan))
    print()
Ejemplo n.º 2
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--input_dir', required=True, type=str, \
            help='Input directory with images.')
    parser.add_argument('--output_file', required=True, type=str, \
            help='Output directory with images.')
    parser.add_argument('--input_dir_mode', default='test', type=str, \
            help='Either flat or hierarchy, how the input dir is organised')
    parser.add_argument('--train_batch_size', default=24, type=int, \
            help='How many images process at one time.')
    args = parser.parse_args()

    input_dir_abs = os.path.abspath(args.input_dir)

    if args.input_dir_mode == 'flat':
        df_meta = pd.read_csv(args.meta)
        df_meta = df_meta[['ImageId', 'TrueLabel']]
        df_meta['ImageId'] = os.path.abspath(
            args.input_dir) + '/' + df_meta['ImageId'] + '.png'
        df_meta['TrueLabel'] = df_meta['TrueLabel'] - 1
        df_meta = df_meta.set_index('ImageId')
        meta_dict = df_meta.to_dict()['TrueLabel']
        images_info = image_generators.get_names_and_labels(\
                args.input_dir, mode='flat', meta_dict=meta_dict)
    elif args.input_dir_mode == 'flat_targeted':
        df_meta = pd.read_csv(args.meta)
        df_meta = df_meta[['ImageId', 'TrueLabel', 'TargetClass']]
        df_meta['ImageId'] = os.path.abspath(
            args.input_dir) + '/' + df_meta['ImageId'] + '.png'
        df_meta['TrueLabel'] = df_meta['TrueLabel'] - 1
        df_meta['TargetClass'] = df_meta['TargetClass'] - 1
        df_meta = df_meta.set_index('ImageId')
        meta_dict = df_meta.to_dict()['TrueLabel']
        meta_target_dict = df_meta.to_dict()['TargetClass']
        images_info = image_generators.get_names_and_labels(\
                args.input_dir, mode='flat_targeted', meta_dict=meta_dict, meta_target_dict=meta_target_dict)
    elif args.input_dir_mode == 'test':
        images_info = image_generators.get_names_and_labels(\
                args.input_dir, mode='test')
    elif args.input_dir_mode == 'test_targeted':
        df_meta = pd.read_csv(os.path.join(obs.path.abspath(args.input_dir), 'target_class.csv'), \
                header=None, names=['ImageId', 'TargetClass'])
        df_meta = df_meta.set_index('ImageId')
        meta_target_dict = df_meta.to_dict()['TargetClass']
        images_info = image_generators.get_names_and_labels(\
                args.input_dir, mode='test_targeted', meta_target_dict=meta_target_dict)
    else:
        images_info = image_generators.get_names_and_labels(\
                args.input_dir, mode='hierarchy')
        image_generators.initialize_hierarchy(args.output_dir)

    print('Total images:', len(images_info))

    eps = 8.

    num_samples = len(images_info)

    use_avg_pred = True
    step_size = eps
    noise_size = eps * 0.2
    grad_aug_scale = 0.02

    print('Loading images')
    # each image_data element: filename, img, lbl
    image_data = image_generators.load_images_into_batches(\
            images_info, args.train_batch_size, args.input_dir, \
            max_samples=num_samples, to_buffer=True)
    print('Generating placeholders')
    placeholders = {}
    for i, data in enumerate(image_data):
        imgmat_size = (len(data[0]), 299, 299, 3)
        lblmat_size = (len(data[0]), 1000)
        data_min = get_raw_array(
            init=np.clip(get_array(data[1], imgmat_size) - eps, 0, None))
        data_max = get_raw_array(
            init=np.clip(get_array(data[1], imgmat_size) + eps, None, 255))
        data_actual = data[1]
        grad_mat = get_raw_array(dims=imgmat_size)
        lbl_mat = get_raw_array(dims=lblmat_size)
        # data, min, max, grad, lbl, pseudo-label matrix
        placeholders[i] = (\
                data[0], \
                data_actual, \
                data_min, \
                data_max, \
                grad_mat, \
                data[2], \
                lbl_mat)
    print('Preparing functions')

    # each tuple is a sequence of pairs followed by two floats
    # the pairs describe which network to use and the rand_repeat for accumulating gradients
    # the two floats are the step size and noise size of that FGSM step

    source_models = [
        'incresv2ensadv', 'inceptionv3adv', 'inceptionv3ens3adv',
        'inceptionv3ens4adv'
    ]
    dist_pairs = [('inceptionv3ens3adv', 'inceptionv3ens4adv'), \
            ('inceptionv3ens3adv', 'incresv2ensadv'), \
            ('inceptionv3ens4adv', 'incresv2ensadv'), \
            ('inceptionv3adv', 'incresv2ensadv'), \
            ('inceptionv3ens3adv', 'inceptionv3adv'), \
            ('inceptionv3ens4adv', 'inceptionv3adv'), \
            ]

    step_size = 1.
    noise_size = 0.4
    grad_aug_scale = 0.01
    pred_aug_scale = 0.01

    plan = [\
            (('RST', 'def'), \
            #((source_models, dist_pairs), \
                ((('inceptionv3ens3adv','inceptionv3ens4adv'), 1), step_size, noise_size), \
                ((('inceptionv3ens3adv','inceptionv3ens4adv'), 1), step_size, noise_size), \
                ((('inceptionv3ens4adv','incresv2ensadv'), 1), (('inceptionv3ens3adv','incresv2ensadv'), 1), step_size, noise_size), \
                ((('inceptionv3ens3adv','inceptionv3ens4adv'), 1), step_size, noise_size), \
                ((('inceptionv3ens4adv','incresv2ensadv'), 1), (('inceptionv3ens3adv','incresv2ensadv'), 1), step_size, noise_size), \
                ((('inceptionv3ens3adv','inceptionv3ens4adv'), 1), step_size, noise_size), \
                ((('inceptionv3ens4adv','inceptionv3adv'), 1), (('inceptionv3ens3adv','incresv2ensadv'), 1), step_size, noise_size)), \
            ]

    task_list = []
    phase_info = {}
    next_task = {}

    for i_phase, phase in enumerate(plan):
        last_task_in_batch = {}
        if phase[0][0] == 'RST':
            task = ('phase {0} restore from {1}'.format(i_phase, phase[0][1]), \
                    (), 7, (phase[0][1],))
            task_list.append(task)
            phase_info[i_phase] = (len(task_list) - 1, set(), set())
        else:
            task = ('phase {0} load {1}'.format(i_phase, phase[0]), (), 5+1*(i_phase==0), \
                    (phase[0][0], phase[0][1]))
            task_list.append(task)
            # id of loading task, all tasks other than load, first tasks after load
            phase_info[i_phase] = (len(task_list) - 1, set(), set())
        for phid, (fname_list, imgs, imgs_min, imgs_max, grads, lbls,
                   lbls_mat) in placeholders.items():
            for i_pl, pl in enumerate(phase[1:]):
                if pl[-1] > 1e-5:
                    task = ('phase {0} batch {1} step {2}, noise {3:.2f}'.format(i_phase, phid, i_pl, pl[-1]), \
                            (fname_list,imgs,None,grads,imgs_min,imgs_max,lbls_mat),4,\
                            (pl[-1],))
                    task_list.append(task)

                    # COPIED
                    if phid not in last_task_in_batch:
                        phase_info[i_phase][2].add(len(task_list) - 1)
                    else:
                        next_task[
                            last_task_in_batch[phid]] = len(task_list) - 1
                    last_task_in_batch[phid] = len(task_list) - 1
                    phase_info[i_phase][1].add(len(task_list) - 1)

                for j_stp, stp in enumerate(pl[:-2]):
                    task = ('phase {0} batch {1} step {2}, grad {3} repeat {4}'.format(i_phase, phid, i_pl, stp[0], stp[1]), \
                            (fname_list,imgs,None,grads,imgs_min,imgs_max,lbls_mat),0,\
                            (None, stp[0], grad_aug_scale, stp[1]))
                    task_list.append(task)

                    # COPIED
                    if phid not in last_task_in_batch:
                        phase_info[i_phase][2].add(len(task_list) - 1)
                    else:
                        next_task[
                            last_task_in_batch[phid]] = len(task_list) - 1
                    last_task_in_batch[phid] = len(task_list) - 1
                    phase_info[i_phase][1].add(len(task_list) - 1)

                task = ('phase {0} batch {1} step {2}, FGSM step, step size {3:.2f}'.format(i_phase, phid, i_pl, pl[-2]), \
                        (fname_list,imgs,None,grads,imgs_min,imgs_max,lbls_mat),1,\
                        (pl[-2],))
                task_list.append(task)
                # COPIED
                if phid not in last_task_in_batch:
                    phase_info[i_phase][2].add(len(task_list) - 1)
                else:
                    next_task[last_task_in_batch[phid]] = len(task_list) - 1
                last_task_in_batch[phid] = len(task_list) - 1
                phase_info[i_phase][1].add(len(task_list) - 1)

            task = ('phase {0} batch {1}, final pred'.format(i_phase, phid), \
                    (fname_list,imgs,None,grads,imgs_min,imgs_max,lbls_mat),3,(pred_aug_scale,))
            task_list.append(task)
            # COPIED
            if phid not in last_task_in_batch:
                phase_info[i_phase][2].add(len(task_list) - 1)
            else:
                next_task[last_task_in_batch[phid]] = len(task_list) - 1
            last_task_in_batch[phid] = len(task_list) - 1
            phase_info[i_phase][1].add(len(task_list) - 1)

    print('Preparation finished', time.time() - start_time)
    print(len(task_list), 'tasks')
    run_tasks(task_list, phase_info, next_task, verbose=False, cpu_worker=1)

    with open(args.output_file, 'w') as f:
        for phid, (fnames, _, _, _, _, _, lbls_mat) in placeholders.items():
            batch_preds = get_array(lbls_mat,
                                    (len(fnames), 1000)).argmax(axis=1)
            for img, pred in zip(fnames, batch_preds):
                _ = f.write('{0},{1}\n'.format(
                    os.path.split(img)[1], pred + 1))

    print('Time:', time.time() - start_time)
    print('\n'.join(str(u) for u in plan))
    print()