Python grad_zの例、pytorch_influence_functions.influence_functions.hvp_grad.grad_z Pythonの例

コード例 #1

0

ファイルを表示

    def test_s_test_sample(self):

        estimated_ihvp = s_test_sample(
            self.model,
            self.x_test,
            self.y_test,
            self.train_loader,
            gpu=self.gpu,
            damp=0.0,
            r=10,
            recursion_depth=10_000,
            batch_size=500,
        )

        flat_estimated_ihvp = parameters_to_vector(estimated_ihvp)

        print("LiSSA")
        self.assertTrue(self.check_estimation(flat_estimated_ihvp))

        print("Influence")
        inf_app, inf_rea = [], []
        for i, (x_train, y_train) in enumerate(
                self.model.train_dataloader(batch_size=1, shuffle=False)):
            grads_train = grad_z(x_train, y_train, self.model, gpu=self.gpu)
            flat_grads_train = parameters_to_vector(grads_train)
            inf_app.append(-torch.sum(flat_grads_train * flat_estimated_ihvp /
                                      len(self.model.training_set)).item())
            inf_rea.append(-torch.sum(flat_grads_train * self.real_ihvp /
                                      len(self.model.training_set)).item())
        np.save("influence.npy", {'inf_app': inf_app, 'inf_rea': inf_rea})

コード例 #2

0

ファイルを表示

ファイル: influence_functions.py プロジェクト: dedeswim/pytorch_influence_functions

def calc_grad_z(model, train_loader, save_pth=False, gpu=-1, start=0):
    """Calculates grad_z and can save the output to files. One grad_z should
    be computed for each training data sample.

    Arguments:
        model: pytorch model, for which s_test should be calculated
        train_loader: pytorch dataloader, which can load the train data
        save_pth: Path, path where to save the grad_z files if desired.
            Omitting this argument will skip saving
        gpu: int, device id to use for GPU, -1 for CPU (default)
        start: int, index of the first test index to use. default is 0

    Returns:
        grad_zs: list of torch tensors, contains the grad_z tensors
        save_pth: Path, path where grad_z files were saved to or
            False if they were not saved."""
    if save_pth and isinstance(save_pth, str):
        save_pth = Path(save_pth)
    if not save_pth:
        logging.info("ATTENTION: Not saving grad_z files!")

    grad_zs = []
    for i in range(start, len(train_loader.dataset)):
        z, t = train_loader.dataset[i]
        z = train_loader.collate_fn([z])
        t = train_loader.collate_fn([t])
        grad_z_vec = grad_z(z, t, model, gpu=gpu)
        if save_pth:
            grad_z_vec = [g.cpu() for g in grad_z_vec]
            torch.save(grad_z_vec, save_pth.joinpath(f"{i}.grad_z"))
        else:
            grad_zs.append(grad_z_vec)
        display_progress("Calc. grad_z: ", i - start,
                         len(train_loader.dataset) - start)

    return grad_zs, save_pth

コード例 #3

0

ファイルを表示

ファイル: influence_functions.py プロジェクト: dedeswim/pytorch_influence_functions

def calc_influence_single(
    model,
    train_loader,
    test_loader,
    test_id_num,
    gpu,
    recursion_depth,
    r,
    s_test_vec=None,
    time_logging=False,
):
    """Calculates the influences of all training data points on a single
    test dataset image.

    Arugments:
        model: pytorch model
        train_loader: DataLoader, loads the training dataset
        test_loader: DataLoader, loads the test dataset
        test_id_num: int, id of the test sample for which to calculate the
            influence function
        gpu: int, identifies the gpu id, -1 for cpu
        recursion_depth: int, number of recursions to perform during s_test
            calculation, increases accuracy. r*recursion_depth should equal the
            training dataset size.
        r: int, number of iterations of which to take the avg.
            of the h_estimate calculation; r*recursion_depth should equal the
            training dataset size.
        s_test_vec: list of torch tensor, contains s_test vectors. If left
            empty it will also be calculated

    Returns:
        influence: list of float, influences of all training data samples
            for one test sample
        harmful: list of float, influences sorted by harmfulness
        helpful: list of float, influences sorted by helpfulness
        test_id_num: int, the number of the test dataset point
            the influence was calculated for"""
    # Calculate s_test vectors if not provided
    if not s_test_vec:
        z_test, t_test = test_loader.dataset[test_id_num]
        z_test = test_loader.collate_fn([z_test])
        t_test = test_loader.collate_fn([t_test])
        s_test_vec = s_test_sample(
            model,
            z_test,
            t_test,
            train_loader,
            gpu,
            recursion_depth=recursion_depth,
            r=r,
        )

    # Calculate the influence function
    train_dataset_size = len(train_loader.dataset)
    influences = []
    for i in tqdm(range(train_dataset_size)):
        z, t = train_loader.dataset[i]
        z = train_loader.collate_fn([z])
        t = train_loader.collate_fn([t])

        if time_logging:
            time_a = datetime.datetime.now()

        grad_z_vec = grad_z(z, t, model, gpu=gpu)

        if time_logging:
            time_b = datetime.datetime.now()
            time_delta = time_b - time_a
            logging.info(f"Time for grad_z iter:"
                         f" {time_delta.total_seconds() * 1000}")
        with torch.no_grad():
            tmp_influence = (
                -sum([
                    ####################
                    # TODO: potential bottle neck, takes 17% execution time
                    # torch.sum(k * j).data.cpu().numpy()
                    ####################
                    torch.sum(k * j).data
                    for k, j in zip(grad_z_vec, s_test_vec)
                ]) / train_dataset_size)

        influences.append(tmp_influence)
        # display_progress("Calc. influence function: ", i, train_dataset_size)

    harmful = np.argsort(influences)
    helpful = harmful[::-1]

    return influences, harmful.tolist(), helpful.tolist(), test_id_num

コード例 #4

0

ファイルを表示

    def setUpClass(cls) -> None:
        pl.seed_everything(0)

        cls.n_features = 10

        cls.n_params = 2 * cls.n_features

        cls.model = LinearRegression(cls.n_features)

        gpus = 1 if torch.cuda.is_available() else 0
        trainer = pl.Trainer(gpus=gpus, max_epochs=10)
        # trainer.fit(cls.model)

        print(tuple(cls.model.parameters()))
        use_sklearn = True
        if use_sklearn:
            train_dataset = DummyDataset(cls.n_features)
            clf = SklearnLR()
            clf.fit(train_dataset.data, train_dataset.targets)

            with torch.no_grad():
                cls.model.linear.weight = torch.nn.Parameter(
                    torch.tensor([clf.coef_], dtype=torch.float))
                cls.model.linear.bias = torch.nn.Parameter(
                    torch.tensor([clf.intercept_], dtype=torch.float))

        cls.train_loader = cls.model.train_dataloader(batch_size=40000)

        # Setup test point data
        cls.test_idx = 8
        cls.x_test = torch.tensor([cls.model.test_set.data[[cls.test_idx]]],
                                  dtype=torch.float)
        cls.y_test = torch.tensor([cls.model.test_set.targets[[cls.test_idx]]],
                                  dtype=torch.float)

        # Compute estimated IVHP
        cls.gpu = 1 if torch.cuda.is_available() else -1

        # Compute anc flatten grad
        grads = grad_z(cls.x_test, cls.y_test, cls.model, gpu=cls.gpu)
        flat_grads = parameters_to_vector(grads)

        print("Grads:")
        print(flat_grads)

        # Make model functional
        params, names = make_functional(cls.model)
        # Make params regular Tensors instead of nn.Parameter
        params = tuple(p.detach().requires_grad_() for p in params)
        flat_params = parameters_to_vector(params)

        # Initialize Hessian
        h = torch.zeros([flat_params.shape[0], flat_params.shape[0]])

        # Compute real IHVP
        for x_train, y_train in cls.train_loader:

            if cls.gpu >= 0:
                x_train, y_train = x_train.cuda(), y_train.cuda()

            def f(flat_params_):
                split_params = tensor_to_tuple(flat_params_, params)
                load_weights(cls.model, names, split_params)
                out = cls.model(x_train)
                loss = calc_loss(out, y_train)
                return loss

            batch_h = hessian(f, flat_params, strict=True)

            with torch.no_grad():
                h += batch_h / float(len(cls.train_loader))

        print("Hessian:")
        print(h)

        complete_x_train = cls.train_loader.dataset.data

        real_hessian = complete_x_train.T @ complete_x_train / complete_x_train.shape[
            0] * 2

        print(real_hessian)

        print(np.linalg.norm(real_hessian - h.cpu().numpy()[:10, :10]))

        np.save("hessian_pytorch.npy", h.cpu().numpy())

        # Make the model back `nn`

        with torch.no_grad():
            load_weights(cls.model, names, params, as_params=True)
            inv_h = torch.inverse(h)
            print("Inverse Hessian")
            print(inv_h)
            cls.real_ihvp = inv_h @ flat_grads

        print("Real IHVP")
        print(cls.real_ihvp)

コード例 #5

0

ファイルを表示

    def setUpClass(cls) -> None:
        pl.seed_everything(0)

        cls.n_features = 10
        cls.n_classes = 3

        cls.n_params = cls.n_classes * cls.n_features + cls.n_features

        cls.wd = wd = 1e-2  # weight decay=1/(nC)
        cls.model = LogisticRegression(cls.n_classes,
                                       cls.n_features,
                                       wd=cls.wd)

        gpus = 1 if torch.cuda.is_available() else 0

        trainer = pl.Trainer(gpus=gpus, max_epochs=10)
        # trainer.fit(self.model)

        use_sklearn = True
        if use_sklearn:
            cls.train_dataset = cls.model.training_set  #DummyDataset(cls.n_features, cls.n_classes)
            multi_class = "multinomial" if cls.model.n_classes != 2 else "auto"
            clf = SklearnLogReg(C=1 / len(cls.train_dataset) / wd,
                                tol=1e-8,
                                max_iter=1000,
                                multi_class=multi_class)

            clf.fit(cls.train_dataset.data, cls.train_dataset.targets)

            with torch.no_grad():
                cls.model.linear.weight = torch.nn.Parameter(
                    torch.tensor(clf.coef_, dtype=torch.float))
                cls.model.linear.bias = torch.nn.Parameter(
                    torch.tensor(clf.intercept_, dtype=torch.float))

        # Setup test point data
        cls.test_idx = 5
        cls.x_test = torch.tensor(cls.model.test_set.data[[cls.test_idx]],
                                  dtype=torch.float)
        cls.y_test = torch.tensor(cls.model.test_set.targets[[cls.test_idx]],
                                  dtype=torch.long)

        # Compute estimated IVHP
        cls.gpu = 1 if torch.cuda.is_available() else -1

        if cls.gpu >= 0:
            cls.model = cls.model.cuda()
            cls.x_test = cls.x_test.cuda()
            cls.y_test = cls.y_test.cuda()

        cls.train_loader = cls.model.train_dataloader(batch_size=40000)
        # Compute anc flatten grad
        grads = grad_z(cls.x_test, cls.y_test, cls.model, gpu=cls.gpu)
        flat_grads = parameters_to_vector(grads)

        print("Grads:")
        print(flat_grads)

        # Make model functional
        params, names = make_functional(cls.model)
        # Make params regular Tensors instead of nn.Parameter
        params = tuple(p.detach().requires_grad_() for p in params)
        flat_params = parameters_to_vector(params)

        # Initialize Hessian
        h = torch.zeros([flat_params.shape[0], flat_params.shape[0]])
        if cls.gpu == 1:
            h = h.cuda()

        # Compute real IHVP
        for x_train, y_train in cls.train_loader:

            if cls.gpu >= 0:
                x_train, y_train = x_train.cuda(), y_train.cuda()

            f = make_loss_f(cls.model, params, names, x_train, y_train, wd=wd)

            batch_h = hessian(f, flat_params, strict=True)

            with torch.no_grad():
                h += batch_h / float(len(cls.train_loader))

        h = (h + h.transpose(0, 1)) / 2
        print("Hessian:")
        print(h)

        np.save("hessian_pytorch.npy", h.cpu().numpy())
        from numpy import linalg as LA
        ei = LA.eig(h.cpu().numpy())[0]
        print('ei=', ei)
        print("max,min eigen value=", ei.max(), ei.min())
        assert ei.min() > 0, "Error: Non-positive Eigenvalues"

        # Make the model back `nn`

        with torch.no_grad():
            load_weights(cls.model, names, params, as_params=True)
            inv_h = torch.inverse(h)
            print("Inverse Hessian")
            print(inv_h)
            cls.real_ihvp = inv_h @ flat_grads

        print("Real IHVP")
        print(cls.real_ihvp)

コード例 #6

0

ファイルを表示

def calc_influence_single(
    model,
    train_loader,
    test_loader,
    test_id_num,
    gpu,
    recursion_depth,
    r,
    damp=0.01,
    scale=25,
    s_test_vec=None,
    time_logging=False,
    exact=False,
    batch_size=1,
):
    """Calculates the influences of all training data points on a single
    test dataset image.

    Arugments:
        model: pytorch model
        train_loader: DataLoader, loads the training dataset
        test_loader: DataLoader, loads the test dataset
        test_id_num: int or list of int, id of the test samples for which to calculate the
            influence function
        gpu: int, identifies the gpu id, -1 for cpu
        recursion_depth: int, number of recursions to perform during s_test
            calculation, increases accuracy. r*recursion_depth should equal the
            training dataset size.
        r: int, number of iterations of which to take the avg.
            of the h_estimate calculation; r*recursion_depth should equal the
            training dataset size.
        s_test_vec: list of torch tensor, contains s_test vectors. If left
            empty it will also be calculated

    Returns:
        influence: list of float, influences of all training data samples
            for one test sample, which is the predicted change in loss after 
            removing the test sample
        harmful: list of float, influences sorted by harmfulness
        helpful: list of float, influences sorted by helpfulness
        test_id_num: int or list of int, the id of the test dataset points
            the influence was calculated for"""
    # Calculate s_test vectors if not provided
    if s_test_vec is None:
        if isinstance(test_id_num, int):
            test_id_num = [test_id_num]
        z_test, t_test = list(
            zip(*[test_loader.dataset[i] for i in test_id_num]))
        z_test = test_loader.collate_fn(z_test)
        t_test = test_loader.collate_fn(t_test)
        s_test_vec = s_test_sample(
            model,
            z_test,
            t_test,
            train_loader,
            gpu,
            recursion_depth=recursion_depth,
            r=r,
            damp=damp,
            scale=scale,
            exact=exact,
            batch_size=batch_size,
        )

    # Calculate the influence function
    train_dataset_size = len(train_loader.dataset)
    loss_diffs = []  # predicted value of new loss - original loss
    for i in tqdm(range(train_dataset_size)):
        z, t = train_loader.dataset[i]
        z = train_loader.collate_fn([z])
        t = train_loader.collate_fn([t])

        if time_logging:
            time_a = datetime.datetime.now()

        grad_z_vec = grad_z(z, t, model, gpu=gpu)

        if time_logging:
            time_b = datetime.datetime.now()
            time_delta = time_b - time_a
            logging.info(f"Time for grad_z iter:"
                         f" {time_delta.total_seconds() * 1000}")
        with torch.no_grad():
            tmp_loss_diff = (
                sum([
                    ####################
                    # TODO: potential bottle neck, takes 17% execution time
                    # torch.sum(k * j).data.cpu().numpy()
                    ####################
                    torch.sum(k * j).data
                    for k, j in zip(grad_z_vec, s_test_vec)
                ]) / train_dataset_size)

        loss_diffs.append(tmp_loss_diff.item())

    harmful = np.argsort(loss_diffs)
    helpful = harmful[::-1]

    return np.array(
        loss_diffs), harmful.tolist(), helpful.tolist(), test_id_num