def nfe(args):
    run = Experiment.from_dir(args.run, main='model')
    print(run)
    results_file = run.path_to('nfe.csv')
    best_ckpt_file = run.ckpt('best')

    # check if results exists and are updated, then skip the computation
    if os.path.exists(results_file
                      ) and os.path.getctime(results_file) >= os.path.getctime(
                          best_ckpt_file) and not args.force:
        print('Skipping...')
        return

    test_data = load_test_data(run)
    test_loader = DataLoader(test_data, batch_size=1, shuffle=False)

    model = load_model(run)
    model = model.to(args.device)
    model.eval()
    model.odeblock.tol = args.tol

    def process(datum):
        x, y = datum
        x = x.to(args.device)
        p = model(x)
        nfe = model.nfe(reset=True)
        pred = p.argmax(dim=1).item()
        y = y.item()
        return {'y_true': y, 'y_pred': pred, 'nfe': nfe}

    data = [process(d) for d in tqdm(test_loader)]
    pd.DataFrame(data).to_csv(results_file)
Beispiel #2
0
def nparams(args):
    assert Experiment.is_exp_dir(args.run), "Not a run dir: args.run"
    run = Experiment.from_dir(args.run, main='model')
    model = load_model(run)
    print(run)

    nparams = sum(p.numel() for p in tqdm(model.parameters()) if p.requires_grad)
    print(f'N. Params: {nparams / 10 ** 6:.2g}M ({nparams})')
def finetune(args):
    run = Experiment.from_dir(args.run, main='model')
    print(run)

    features_file = 'features.h5' if args.data is None else 'features-{}.h5'.format(
        args.data)
    features_file = run.path_to(features_file)

    assert os.path.exists(
        features_file), f"Features file not found: {features_file}"

    results = pd.DataFrame()

    results_file = run.path_to('finetune.csv')
    if os.path.exists(results_file):
        if os.path.getctime(results_file) >= os.path.getctime(
                features_file) and not args.force:
            results = pd.read_csv(results_file)

    params = next(run.params.itertuples())

    with h5py.File(features_file, 'r') as f:
        features = f['features'][...]
        y_true = f['y_true'][...]
        t1s = f['t1s'][...]

    block = np.zeros_like(t1s, dtype=int)
    if params.downsample == "ode":
        block = np.concatenate((block, block + 1))
        t1s = np.concatenate((t1s, t1s))

    svm = LinearSVC()
    Cs = np.logspace(-2, 2, 5)
    svm = GridSearchCV(svm, {'C': Cs},
                       scoring='accuracy',
                       n_jobs=-1,
                       verbose=10,
                       cv=5)

    for t1, b, fi in tqdm(zip(t1s, block, features)):
        if 't1' in results.columns and 'block' in results.columns and (
            (results.t1 == t1) & (results.block == b)).any():
            print(f'Skipping b={b} t1={t1} ...')
            continue

        score = svm.fit(fi, y_true).best_score_
        print(f'Accuracy: {score:.2%}')
        results = results.append({
            'block': b,
            't1': t1,
            'cv_accuracy': score
        },
                                 ignore_index=True)
        results.to_csv(results_file, index=False)
Beispiel #4
0
def nfe(args):
    run = Experiment.from_dir(args.run, main='model')
    print(run)
    results_file = run.path_to('nfe.csv.gz')
    best_ckpt_file = run.ckpt('best')

    results = pd.DataFrame()
    # check if results exists and are updated, then skip the computation
    if os.path.exists(results_file
                      ) and os.path.getctime(results_file) >= os.path.getctime(
                          best_ckpt_file) and not args.force:
        results = pd.read_csv(results_file,
                              float_precision='round_trip').round({'t1': 2})

    test_data = load_test_data(run)
    test_loader = DataLoader(test_data, batch_size=1, shuffle=False)

    model = load_model(run)
    model = model.to(args.device)
    model.eval()

    def _nfe(test_loader, model, t1, tol, args):
        model.odeblock.t1 = t1
        model.odeblock.tol = tol

        y_true = []
        y_pred = []
        nfes = []

        for x, y in tqdm(test_loader):
            y_true.append(y.item())
            y_pred.append(model(x.to(args.device)).argmax(dim=1).item())
            nfes.append(model.nfe(reset=True))

        return {'y_true': y_true, 'y_pred': y_pred, 'nfe': nfes}

    progress = tqdm(itertools.product(args.tol, args.t1))
    for tol, t1 in progress:
        if 't1' in results.columns and 'tol' in results.columns and (
            (results.t1 == t1) & (results.tol == tol)).any():
            print(f'Skipping tol={tol} t1={t1} ...')
            continue

        progress.set_postfix({'tol': tol, 't1': t1})
        result = _nfe(test_loader, model, t1, tol, args)
        result = pd.DataFrame(result)
        result['t1'] = t1
        result['tol'] = tol
        results = results.append(result, ignore_index=True)
        results.to_csv(results_file, index=False)
Beispiel #5
0
def retrieval(args):
    assert Experiment.is_exp_dir(args.run), "Not a run dir: args.run"
    run = Experiment.from_dir(args.run, main='model')
    results_file = run.path_to('retrieval.csv')

    assert os.path.exists(results_file), f"Results file not found: {results_file}"

    results = pd.read_csv(results_file)
    # t1s, mean_aps_sym, mean_aps_asym = results.loc[:, ['t1', 'mean_ap_sym', 'mean_ap_asym']]

    plt.figure(figsize=(15,5))
    ax = plt.gca()
    results.plot('t1', 'mean_ap_sym', marker='.', label='sym', ax=ax)
    results.plot('t1', 'mean_ap_asym', marker='.', label='asym', ax=ax)
    # plt.axhline(mean_ap_res, c='k', label='resnet')
    max_diff = (results.mean_ap_asym - results.mean_ap_sym).max()
    print(f'Asym-sym max difference: {max_diff:%}')
    # plt.plot(t1s, mean_aps_asym - mean_aps_sym, marker='.', label='diff')

    plt.title('mAP vs Feature Depth (t) - CIFAR-10')
    plt.xlabel('Integration time')
    plt.ylabel('mAP')
    # plt.ylim([0, 1])
    plt.legend(loc='best')

    ''' NFE sectioning
    ns = np.diff(nfe)
    xv = np.diff(t1s)/2
    xv[1:] += t1s[1:-1]
    xv = xv[ns != 0]

    for x in xv:
        plt.axvline(x, c='k', ls='--')

    xv = np.array([0, ] + xv.tolist() + [1,])
    xl = np.diff(xv) / 2
    xl[1:] += xv[1:-1]

    for x, n in zip(xl, np.unique(nfe)):
        plt.annotate('NFE = {:.1f}'.format(n), (x, .2), rotation=90, ha='center', va='center')
    '''

    plt.savefig(args.output, bbox_inches="tight")
Beispiel #6
0
def nfe(args):
    assert Experiment.is_exp_dir(args.run), "Not a run dir: args.run"
    exp = Experiment.from_dir(args.run, main='model')
    nfe = pd.read_csv(exp.path_to('nfe.csv.gz'))  #, index_col=0)
    nfe = nfe[(nfe.t1 == 1) & (nfe.tol == 0.001)].reset_index(drop=True)
    nfe.nfe = (nfe.nfe - 2) / 6  # show n. steps instead of NFE

    nfe = nfe[nfe.y_true == nfe.y_pred]

    dataset = exp.params.dataset.iloc[0]
    if dataset == 'mnist':
        labels = [str(i) for i in range(10)]
    elif dataset == 'cifar10':
        labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

    nfe.y_true = nfe.y_true.apply(lambda x: labels[x])
    nfe = nfe.sort_values('y_true')
    # g = sns.FacetGrid(nfe, col='y_true')
    # g.map(sns.kdeplot, 'nfe')
    # ax = sns.boxplot(y='y_true', x='nfe', data=nfe, orient='h')
    # ax.set_xlabel('solver steps')
    # ax.set_ylabel('image class')
    
    print('{:.2g} \pm {:.2g}'.format(nfe.nfe.mean(), nfe.nfe.std()))

    min, max = nfe.nfe.min(), nfe.nfe.max()
    values = np.arange(min, max + 1)
    plt.xticks(values)
    bins = values - .5
    plt.xlim(bins[0], bins[-1])
    counts, _, _ = plt.hist(nfe.nfe, bins=bins)
    plt.grid(b=False, axis='x')
    plt.grid(b=True, which='minor', linewidth=0.5, linestyle='--', axis='y')
    plt.gca().get_yaxis().set_minor_locator(matplotlib.ticker.AutoMinorLocator())

    for v, c in zip(values, counts):
        plt.text(v, c, f'{c:g}', ha='center', va='bottom')

    plt.savefig(args.output, bbox_inches="tight")
    plt.close()

    """ Images """
    sns.set_style('white')

    n = 5
    pad = 20
    side = 28 if dataset == 'mnist' else 32
    side += 2  # make_grid padding
    groups = nfe.groupby('y_true')

    test_data = load_test_data(exp)
    test_data.transform = transforms.ToTensor()

    largest_nfe = groups.nfe.nlargest(n)
    high_nfe_idxs = largest_nfe.index.get_level_values(1)
    high_nfe_images = torch.stack([test_data[i][0] for i in high_nfe_idxs])
    high_nfe_grid = make_grid(high_nfe_images, nrow=n)

    smallest_nfe = groups.nfe.nsmallest(n).reset_index().sort_values(['y_true', 'nfe'], ascending=[True, False])
    low_nfe_idxs = smallest_nfe.level_1  # nsmallest in reverse order
    low_nfe_images = torch.stack([test_data[i][0] for i in low_nfe_idxs])
    low_nfe_grid = make_grid(low_nfe_images, nrow=n)
    smallest_nfe = smallest_nfe.nfe

    grid_h = low_nfe_grid.shape[1]
    img_pad = torch.zeros((3, grid_h, pad))
    grid = torch.cat((high_nfe_grid, img_pad, low_nfe_grid), 2)

    plt.imshow(np.transpose(grid.numpy(), (1, 2, 0)))  # , interpolation='nearest')
    for i, (l, s) in enumerate(zip(largest_nfe, smallest_nfe)):
        y, x = (i // n), (i % n)
        y, x = (np.array((y, x)) + (0.8, 0.75)) * side
        text = plt.text(x, y, str(int(l)), fontsize=5, ha='left', va='top', color='white')
        text.set_path_effects([patheffects.Stroke(linewidth=1, foreground='black'), patheffects.Normal()])

        disp = side * n + pad + 6
        text = plt.text(x + disp, y, str(int(s)), fontsize=5, ha='left', va='top', color='white')
        text.set_path_effects([patheffects.Stroke(linewidth=1, foreground='black'), patheffects.Normal()])

    ax = plt.gca()
    h, _ = ax.get_ylim()
    ticks = (np.arange(10) / 10 + 1 / (2 * 10)) * h
    plt.yticks(ticks, labels)

    plt.xticks([])
    h, htxt = -0.03, -0.08
    ax.annotate('', xy=(0, h), xycoords='axes fraction', xytext=(1, h), arrowprops=dict(arrowstyle="<->", color='k'))
    ax.annotate('high NFE', xy=(0, htxt), xycoords='axes fraction', xytext=(0, htxt))
    ax.annotate('low NFE', xy=(1, htxt), xycoords='axes fraction', xytext=(0.87, htxt))

    plt.savefig(args.output2, bbox_inches="tight")
Beispiel #7
0
def main(args):
    exp = Experiment.from_dir(args.run, main='model')
    params = next(exp.params.itertuples())

    # data setup
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Lambda(lambda x: x.numpy())])

    preproc = utils.PREPROC[params.dataset]
    if params.dataset == 'mnist':
        data = MNIST('data/mnist',
                     download=True,
                     train=False,
                     transform=transform)
    elif params.dataset == 'cifar10':
        data = CIFAR10('data/cifar10',
                       download=True,
                       train=False,
                       transform=transform)
        preproc = map(lambda x: np.array(x).reshape((3, 1, 1)),
                      preproc)  # expand dimensions
        preproc = tuple(preproc)

    # model setup
    model = utils.load_model(exp).eval().cuda()
    if args.tol is None:
        args.tol = params.tol

    if params.model == 'odenet':
        model.odeblock.tol = args.tol

    fmodel = foolbox.models.PyTorchModel(model,
                                         bounds=(0, 1),
                                         num_classes=10,
                                         preprocessing=preproc)

    # attack setup
    if args.distance == 2:
        attack = foolbox.attacks.L2BasicIterativeAttack
        distance = foolbox.distances.MSE
    elif args.distance == float('inf'):
        attack = foolbox.attacks.LinfinityBasicIterativeAttack
        distance = foolbox.distances.Linf

    attack = attack(fmodel, distance=distance)

    sub_exp_root = exp.path_to('adv-attack')
    os.makedirs(sub_exp_root, exist_ok=True)

    sub_exp = Experiment(args, root=sub_exp_root, ignore=('run', ))
    print(sub_exp)
    results_file = sub_exp.path_to('results.csv')
    results = pd.read_csv(results_file) if os.path.exists(
        results_file) else pd.DataFrame()

    # perform attack
    progress = tqdm(data)
    for i, (image, label) in enumerate(progress):
        if not results.empty and i in results.sample_id.values:
            continue

        if not isinstance(label, int):
            label = label.item()

        start = time.time()
        adversarial = attack(image,
                             label,
                             unpack=False,
                             binary_search=False,
                             stepsize=args.stepsize,
                             epsilon=args.epsilon)

        elapsed = time.time() - start
        result = pd.DataFrame(dict(
            sample_id=i,
            label=label,
            elapsed_time=elapsed,
            distance=adversarial.distance.value,
            adversarial_class=adversarial.adversarial_class,
            original_class=adversarial.original_class,
        ),
                              index=[0])

        results = results.append(result, ignore_index=True)
        results.to_csv(results_file, index=False)

        success = ~results.adversarial_class.isna()
        successes = success.sum()
        success_rate = success.mean()

        progress.set_postfix({
            'success_rate':
            f'{success_rate:.2%} ({successes}/{len(success)})'
        })
def retrieval(args):
    exp = Experiment.from_dir(args.run, main='model')
    features_file = exp.path_to('features.h5')
    results_file = exp.path_to('retrieval.csv')

    assert os.path.exists(
        features_file), f"No pre-extracted features found: {features_file}"

    all_results = pd.DataFrame()
    # check if results exists and are updated, then load them (and probably skip the computation them later)
    if os.path.exists(results_file
                      ) and os.path.getctime(results_file) >= os.path.getctime(
                          features_file) and not args.force:
        all_results = pd.read_csv(results_file)

    params = next(exp.params.itertuples())

    with h5py.File(features_file, 'r') as f:
        features = f['features'][...]
        y_true = f['y_true'][...]
        if params.model == 'odenet':
            t1s = f['t1s'][...]

    features /= np.linalg.norm(features, axis=-2, keepdims=True)

    queries = features  # all queries

    n_samples = features.shape[
        -2]  # number of samples, for both models (first dimension might be t1)
    n_queries = queries.shape[
        -2]  # number of queries, for both models (first dimension might be t1)

    gt = np.broadcast_to(y_true,
                         (n_queries, n_samples)) == y_true[:n_queries].reshape(
                             n_samples, -1)  # gt per query in each row

    def score(queries, db, gt):
        scores = queries.dot(db.T)
        aps = [
            average_precision_score(gt[i], scores[i])
            for i in trange(n_queries)
        ]
        return np.mean(aps)

    if params.model == 'odenet':
        for i, t1 in enumerate(tqdm(t1s)):
            # TODO check and skip
            mean_ap_asym = score(queries[i], features[-1], gt)  # t1 = 1 for db
            mean_ap_sym = score(queries[i], features[i],
                                gt)  # t1 same for queries and db
            results = {
                't1': t1,
                'mean_ap_asym': mean_ap_asym,
                'mean_ap_sym': mean_ap_sym
            }
            all_results = all_results.append(results, ignore_index=True)
            all_results.to_csv(results_file, index=False)
    else:  # resnet
        mean_ap = score(features, features, gt)
        all_results = all_results.append({'mean_ap': mean_ap},
                                         ignore_index=True)
        all_results.to_csv(results_file, index=False)
def features(args):
    run = Experiment.from_dir(args.run, main='model')
    print(run)

    params = next(run.params.itertuples())

    features_file = 'features.h5' if args.data is None else 'features-{}.h5'.format(
        args.data)
    features_file = run.path_to(features_file)
    results_file = run.path_to('results')
    dependecy_file = run.ckpt('best')

    if os.path.exists(features_file) and os.path.getctime(
            features_file) >= os.path.getctime(
                dependecy_file) and not args.force:
        print('Skipping...')
        sys.exit(0)

    if args.data == 'tiny-imagenet-200':
        transfer_transform = transforms.Compose([
            transforms.Resize(32),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010)),
        ])
        test_data = TinyImageNet200('data/tiny-imagenet-200',
                                    split='val',
                                    transform=transfer_transform)
    else:
        test_data = load_test_data(run)
    test_loader = DataLoader(test_data,
                             batch_size=params.batch_size,
                             shuffle=False)

    model = load_model(run)
    model = model.to(args.device)
    model.eval()
    model.to_features_extractor()

    if params.model == 'odenet':
        if os.path.exists(results_file):  # reuse t1s if already tested
            results = pd.read_csv(results_file)
            results = results[results.t1 <= 1]
            t1s = results.t1.sort_values().unique()
        else:
            t1s = np.arange(.05, 1.05, .05)  # from 0 to 1 w/ .05 step

        model.odeblock.t1 = t1s.tolist()
        if 'ode' in params.downsample:
            model.downsample.odeblock.t1 = t1s.tolist()

        t1s = np.insert(t1s, 0, 0)  # add 0 at the beginning

    features = []
    y_true = []
    with torch.no_grad():
        for x, y in tqdm(test_loader):
            x = x.to(args.device)
            y_true.append(y.numpy())

            f = model(x)
            f = f.cpu().numpy()

            features.append(f)

    features = np.concatenate(features, -2)  # concat along batch dimension
    y_true = np.concatenate(y_true)

    with h5py.File(features_file, 'w') as f:
        f['features'] = features
        f['y_true'] = y_true
        if params.model == 'odenet':
            f['t1s'] = t1s
Beispiel #10
0
def accuracy(args):
    run = Experiment.from_dir(args.run, main='model')
    print(run)
    results_file = run.path_to('results')
    best_ckpt_file = run.ckpt('best')

    all_results = pd.DataFrame()
    # check if results exists and are updated, then load them (and probably skip the computation them later)
    if os.path.exists(results_file
                      ) and os.path.getctime(results_file) >= os.path.getctime(
                          best_ckpt_file) and not args.force:
        all_results = pd.read_csv(results_file)

    params = next(run.params.itertuples())

    test_data = load_test_data(run)
    test_loader = DataLoader(test_data,
                             batch_size=params.batch_size,
                             shuffle=False)

    model = load_model(run)
    model = model.to(args.device)
    model.eval()

    t1 = torch.arange(0, 1.05, .05)  # from 0 to 1 w/ .05 step
    model.odeblock.t1 = t1[1:]  # 0 is implicit
    model.odeblock.return_last_only = False

    if params.downsample == 'ode2':
        model.downsample.odeblock.t1 = t1[1:]  # 0 is implicit
        model.downsample.odeblock.return_last_only = False
        model.downsample.odeblock.apply_conv = True
        t1 = torch.cat((t1, t1))

    T = len(t1)

    def _evaluate(loader, model, tol, args):
        model.odeblock.tol = tol
        if 'ode' in params.downsample:
            model.downsample.odeblock.tol = tol

        n_batches = 0
        n_processed = 0
        nfe_forward = 0

        n_correct = torch.zeros(T)
        tot_losses = torch.zeros(T)

        progress = tqdm(loader)
        for x, y in progress:
            x, y = x.to(args.device), y.to(args.device)
            p = model(x)  # timestamps (T) x batch (N) x classes (C)
            nfe_forward += model.nfe(reset=True)
            pp = p.permute(1, 2, 0)  # N x C x T
            yy = y.unsqueeze(1).expand(-1, T)  # N x T
            losses = F.cross_entropy(pp, yy, reduction='none')  # N x T

            tot_losses += losses.sum(0).cpu()

            yy = y.unsqueeze(0).expand(T, -1)
            n_correct += (yy == p.argmax(dim=-1)).sum(-1).float().cpu()
            n_processed += y.shape[0]
            n_batches += 1

            # logloss = losses.item() / n_processed
            # accuracy = n_correct / n_processed
            nfe = nfe_forward / n_batches
            metrics = {
                # 'loss': f'{logloss:4.3f}',
                # 'acc': f'{n_correct:4d}/{n_processed:4d} ({accuracy:.2%})',
                'nfe': f'{nfe:3.1f}'
            }
            progress.set_postfix(metrics)

        loglosses = tot_losses / n_processed
        accuracies = n_correct / n_processed

        metrics = {
            't1': t1.numpy(),
            'test_loss': loglosses.numpy(),
            'test_acc': accuracies.numpy(),
            'test_nfe': [
                nfe,
            ] * T,
            'test_tol': [
                tol,
            ] * T
        }

        return metrics

    progress = tqdm(args.tol)
    with torch.no_grad():
        for tol in progress:
            progress.set_postfix({'tol': tol})

            if 'test_tol' in all_results.columns and (all_results.test_tol
                                                      == tol).any():
                progress.write(f'Skipping: tol={tol:g}')
                continue

            results = _evaluate(test_loader, model, tol, args)
            results = pd.DataFrame(results)
            all_results = all_results.append(results, ignore_index=True)
            all_results.to_csv(results_file, index=False)
Beispiel #11
0
def tradeoff(args):
    run = Experiment.from_dir(args.run, main='model')
    print(run)
    results_file = run.path_to('tradeoff.csv')
    best_ckpt_file = run.ckpt('best')

    results = pd.DataFrame()
    # check if results exists and are updated, then load them (and probably skip the computation them later)
    if os.path.exists(results_file
                      ) and os.path.getctime(results_file) >= os.path.getctime(
                          best_ckpt_file) and not args.force:
        results = pd.read_csv(results_file)

    params = next(run.params.itertuples())

    test_data = load_test_data(run)
    test_loader = DataLoader(test_data,
                             batch_size=params.batch_size,
                             shuffle=False)

    model = load_model(run)
    model = model.to(args.device)
    model.eval()

    def _evaluate(loader, model, t1, tol, args):
        model.odeblock.t1 = t1
        model.odeblock.tol = tol

        n_correct = 0
        n_batches = 0
        n_processed = 0
        nfe_forward = 0

        progress = tqdm(loader)
        for x, y in progress:
            x, y = x.to(args.device), y.to(args.device)
            p = model(x)
            nfe_forward += model.nfe(reset=True)
            loss = F.cross_entropy(p, y)

            n_correct += (y == p.argmax(dim=1)).sum().item()
            n_processed += y.shape[0]
            n_batches += 1

            logloss = loss.item() / n_processed
            accuracy = n_correct / n_processed
            nfe = nfe_forward / n_batches
            metrics = {
                'loss': f'{logloss:4.3f}',
                'acc': f'{n_correct:4d}/{n_processed:4d} ({accuracy:.2%})',
                'nfe': f'{nfe:3.1f}'
            }
            progress.set_postfix(metrics)

        metrics = {
            't1': t1,
            'test_loss': logloss,
            'test_acc': accuracy,
            'test_nfe': nfe,
            'test_tol': tol
        }
        return metrics

    progress = tqdm(itertools.product(args.tol, args.t1))
    for tol, t1 in progress:
        if 't1' in results.columns and 'test_tol' in results.columns and (
            (results.t1 == t1) & (results.test_tol == tol)).any():
            print(f'Skipping tol={tol} t1={t1} ...')
            continue

        progress.set_postfix({'tol': tol, 't1': t1})
        result = _evaluate(test_loader, model, t1, tol, args)
        results = results.append(result, ignore_index=True)
        results.to_csv(results_file, index=False)
Beispiel #12
0
def nfe(args):
    assert Experiment.is_exp_dir(args.run), "Not a run dir: args.run"
    runs = Experiment.from_dir(args.run, main='model')
    nfe = pd.read_csv(runs.path_to('nfe.csv'), index_col=0)
    sns.boxplot(x='y_true', y='nfe', data=nfe)
    plt.savefig(args.output, bbox_inches="tight")
Beispiel #13
0
def main(args):
    exp = Experiment.from_dir(args.run, main='model')
    params = next(exp.params.itertuples())

    # data setup
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Lambda(lambda x: x.numpy())
    ])

    preproc = utils.PREPROC[params.dataset]
    if params.dataset == 'mnist':
        data = MNIST('data/mnist', download=True, train=False, transform=transform)
    elif params.dataset == 'cifar10':
        data = CIFAR10('data/cifar10', download=True, train=False, transform=transform)
        preproc = map(lambda x: np.array(x).reshape((3, 1, 1)), preproc)  # expand dimensions
        preproc = tuple(preproc)

    t = np.linspace(0, 1, args.resolution + 1).tolist()

    # model setup
    model = utils.load_model(exp).eval().cuda()
    extractor = utils.load_model(exp).eval().cuda()
    extractor.to_features_extractor(keep_pool=False)
    extractor.odeblock.t1 = t

    if args.tol is None:
        args.tol = params.tol

    if params.model == 'odenet':
        model.odeblock.tol = args.tol
        extractor.odeblock.tol = args.tol

    fmodel = foolbox.models.PyTorchModel(model, bounds=(0, 1), num_classes=10, preprocessing=preproc)

    # attack setup
    if args.distance == 2:
        attack = foolbox.attacks.L2BasicIterativeAttack
        distance = foolbox.distances.MSE
    elif args.distance == float('inf'):
        attack = foolbox.attacks.LinfinityBasicIterativeAttack
        distance = foolbox.distances.Linf

    attack = attack(fmodel, distance=distance)

    sub_exp_root = exp.path_to('adv-attack')
    os.makedirs(sub_exp_root, exist_ok=True)

    sub_exp = Experiment(args, root=sub_exp_root, ignore=('run', 'resolution'))
    print(sub_exp)
    results_file = sub_exp.path_to('results.csv')
    diff_l2_file = sub_exp.path_to('diff_l2.csv')
    diff_cos_file = sub_exp.path_to('diff_cos.csv')

    if not os.path.exists(results_file):
        print('No results on attacks found:', results_file)
        return

    results = pd.read_csv(results_file).set_index('sample_id')

    diff_l2 = pd.read_csv(diff_l2_file) if os.path.exists(diff_l2_file) else pd.DataFrame()
    diff_cos = pd.read_csv(diff_cos_file) if os.path.exists(diff_cos_file) else pd.DataFrame()
    diff_cols = ['sample_id'] + t

    progress = tqdm(data)
    for i, (image, label) in enumerate(progress):
        
        if (not diff_l2.empty and not diff_cos.empty and
            i in diff_l2.sample_id.values and i in diff_cos.sample_id.values):
            continue  # skipping, already computed

        perturbation_distance = results.at[i, 'distance']
        if perturbation_distance == 0 or not np.isfinite(perturbation_distance):
            continue  # skipping natural errors or not-found adversarials

        if not isinstance(label, int):
            label = label.item()

        start = time.time()
        adversarial = attack(image, label, unpack=False, binary_search=False, epsilon=args.epsilon)
        elapsed = time.time() - start

        if adversarial.perturbed is None:
            tqdm.write(f'WARN: adversarial not found when reproducing [sample_id = {i}]')
            continue

        with torch.no_grad():
            original_image = torch.from_numpy(adversarial.unperturbed).cuda()
            original_traj = extractor(original_image.unsqueeze(0))

            adversarial_image = torch.from_numpy(adversarial.perturbed).cuda()
            adversarial_traj = extractor(adversarial_image.unsqueeze(0))

        adversarial_traj = adversarial_traj.reshape(args.resolution + 1, -1)
        original_traj = original_traj.reshape(args.resolution + 1, -1)

        """ L2 """
        diff_traj = adversarial_traj - original_traj
        diff_traj = (diff_traj ** 2).sum(1).sqrt()
        diff_traj = diff_traj.cpu().numpy()
        tmp = pd.DataFrame([[i] + diff_traj.tolist()], columns=diff_cols)
        diff_l2 = diff_l2.append(tmp, ignore_index=True)
        diff_l2.to_csv(diff_l2_file, index=False)
        
        """ Cosine similarity """
        diff_traj = F.cosine_similarity(adversarial_traj, original_traj)
        diff_traj = diff_traj.cpu().numpy()
        tmp = pd.DataFrame([[i] + diff_traj.tolist()], columns=diff_cols)
        diff_cos = diff_cos.append(tmp, ignore_index=True)
        diff_cos.to_csv(diff_cos_file, index=False)
Beispiel #14
0
def retrieval(args):
    exp = Experiment.from_dir(args.run, main='model')

    features_file = 'features.h5' if args.data is None else 'features-{}.h5'.format(
        args.data)
    results_file = 'retrieval.csv' if args.data is None else 'retrieval-{}.csv'.format(
        args.data)

    features_file = exp.path_to(features_file)
    results_file = exp.path_to(results_file)

    assert os.path.exists(
        features_file), f"No pre-extracted features found: {features_file}"

    all_results = pd.DataFrame()
    # check if results exists and are updated, then load them (and probably skip the computation them later)
    if os.path.exists(results_file
                      ) and os.path.getctime(results_file) >= os.path.getctime(
                          features_file) and not args.force:
        all_results = pd.read_csv(results_file, float_precision='round_trip')

    with h5py.File(features_file, 'r') as f:
        features = f['features'][...]
        y_true = f['y_true'][...]
        t1s = f['t1s'][...]

    features /= np.linalg.norm(features, axis=-2, keepdims=True) + 1e-7

    queries = features  # all queries

    n_samples = features.shape[
        -2]  # number of samples, for both models (first dimension might be t1)
    n_queries = queries.shape[
        -2]  # number of queries, for both models (first dimension might be t1)

    gt = np.broadcast_to(y_true,
                         (n_queries, n_samples)) == y_true[:n_queries].reshape(
                             n_samples, -1)  # gt per query in each row

    def score(queries, db, gt, k=None):
        scores = queries.dot(db.T)
        if k is None:  # average precision
            aps = [
                average_precision_score(gt[i], scores[i])
                for i in trange(n_queries)
            ]
        else:  # average precision at k
            ranking = scores.argsort(
                axis=1)[:, ::-1][:, :k]  # top k indexes for each query
            ranked_scores = scores[np.arange(n_queries)[:, np.newaxis],
                                   ranking]
            ranked_gt = gt[np.arange(n_queries)[:, np.newaxis], ranking]
            aps = [
                average_precision_score(ranked_gt[i], ranked_scores[i])
                for i in trange(n_queries)
            ]  # avg. prec. @ k

        return aps

    for i, t1 in enumerate(tqdm(t1s)):
        # TODO check and skip
        ap_asym = score(queries[i], features[-1], gt)  # t1 = 1 for db
        ap_sym = score(queries[i], features[i],
                       gt)  # t1 same for queries and db

        ap10_asym = score(queries[i], features[-1], gt, k=10)
        ap10_sym = score(queries[i], features[i], gt, k=10)

        results = pd.DataFrame({
            'ap_asym': ap_asym,
            'ap_sym': ap_sym,
            'ap10_asym': ap10_asym,
            'ap10_sym': ap10_sym
        })
        results['t1'] = t1
        all_results = all_results.append(results, ignore_index=True)
        all_results.to_csv(results_file, index=False)
Beispiel #15
0
def features(args):
    run = Experiment.from_dir(args.run, main='model')
    print(run)

    params = next(run.params.itertuples())

    features_file = 'features.h5' if args.data is None else 'features-{}.h5'.format(
        args.data)
    features_file = run.path_to(features_file)
    dependecy_file = run.ckpt('best')

    if os.path.exists(features_file) and os.path.getctime(
            features_file) >= os.path.getctime(
                dependecy_file) and not args.force:
        print('Features file already exists, skipping...')
        sys.exit(0)

    if args.data == 'cifar10':  # using cifar10 on a tiny-imagenet-200 trained network, resize to 64 and use tiny-imagenet-200 normalization
        transfer_transform = transforms.Compose([
            transforms.Resize(64),
            transforms.ToTensor(),
            transforms.Normalize((0.4802, 0.4481, 0.3975),
                                 (0.2770, 0.2691, 0.2821)),
        ])
        test_data = CIFAR10('data/cifar10',
                            download=True,
                            train=False,
                            transform=transfer_transform)
    elif args.data == 'tiny-imagenet-200':
        transfer_transform = transforms.Compose([
            transforms.Resize(32),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010)),
        ])
        test_data = TinyImageNet200('data/tiny-imagenet-200',
                                    split='val',
                                    transform=transfer_transform)
    else:
        test_data = load_test_data(run)
    test_loader = DataLoader(test_data,
                             batch_size=params.batch_size,
                             shuffle=False)

    model = load_model(run)
    model = model.to(args.device)
    model.eval()
    model.to_features_extractor()

    if params.model == 'odenet':
        model.odeblock.t1 = args.t1
        if 'ode' in params.downsample:
            model.downsample.odeblock.t1 = args.t1
    else:
        args.t1 = np.linspace(0, 1, 7)  # = 1 input + 6 resblocks' outputs
        args.tol = [0]

    tols = np.array(args.tol)
    t1s = np.array(args.t1)
    features = []
    y_true = []

    with torch.no_grad():
        y_true = [y.numpy() for _, y in tqdm(test_loader)]
        y_true = np.concatenate(y_true)

        for tol in tqdm(tols):
            if params.model == "odenet":
                model.odeblock.tol = tol

            f = [
                model(x.to(args.device)).cpu().numpy()
                for x, _ in tqdm(test_loader)
            ]
            f = np.concatenate(f, -2)  # concat along batch dimension

            features.append(f)

        features = np.stack(features)

    with h5py.File(features_file, 'w') as f:
        f['features'] = features
        f['y_true'] = y_true
        f['tols'] = tols
        f['t1s'] = t1s