Ejemplo n.º 1
0
    def eval_impl(cls, model, feed, curves=False, threshold=-0.5):
        """Compute the multi-output binary classification performance metrics."""
        out = {
            "sparsity":
            dict(named_sparsity(model, threshold=threshold, hard=True))
        }

        model.eval()
        y_true, y_pred, logits = predict(model, feed)

        # get the binary classification metrics for each output
        n_samples, n_outputs = y_true.shape
        (tn, fp), (fn, tp) = np.stack([
            confusion_matrix(y_true[:, j], y_pred[:, j])
            for j in range(n_outputs)
        ],
                                      axis=-1)

        # Gotta compute'em all by hand!
        out["accuracy"] = (tp + tn) / (tp + tn + fp + fn)  # ~ P(\hat{y} = y)
        out["precision"] = tp / np.maximum(tp + fp,
                                           1)  # ~ P(y=1 \mid \hat{y}=1)
        out["recall"] = tp / np.maximum(tp + fn, 1)  # ~ P(\hat{y}=1 \mid y=1)

        # Raw per output average precision
        out["average_precision"] = average_precision_score(y_true,
                                                           logits,
                                                           pos_label=1,
                                                           average=None)

        # Pooled AP (treating different outputs as one -- good?) -- slow
        out["pooled_average_precision"] = average_precision_score(
            y_true.ravel(), logits.ravel(), pos_label=1, average=None)

        # compute the curves
        out["ap_curves"] = {}
        if curves:
            y_prob = sigmoid(logits)
            out["ap_curves"]["pooled"] = precision_recall_curve(
                y_true.ravel(), y_prob.ravel())

            out["ap_curves"].update({
                j: precision_recall_curve(y_true[:, j], y_prob[:, j])
                for j in range(n_outputs)
            })
            # curves take too much space 2 x (3 x len x float64)!

        return out
Ejemplo n.º 2
0
    def eval_impl(cls, model, feed, threshold):
        """Compute the multiclass performance metrics."""
        out = {
            "sparsity":
            dict(named_sparsity(model, threshold=threshold, hard=True))
        }

        model.eval()
        y_true, y_pred, logits = predict(model, feed)

        cm = confusion_matrix(y_true, y_pred)
        out["confusion_matrix"] = cm

        tp = cm.diagonal()
        fp, fn = cm.sum(axis=1) - tp, cm.sum(axis=0) - tp

        out["accuracy"] = tp.sum() / cm.sum()  # ~ P(\hat{y} = y)
        out["precision"] = tp / np.maximum(tp + fp,
                                           1)  # ~ P(y=1 \mid \hat{y}=1)
        out["recall"] = tp / np.maximum(tp + fn, 1)  # ~ P(\hat{y}=1 \mid y=1)

        return out
Ejemplo n.º 3
0
def example(kind="cplx"):
    r"""An example, illustrating pre-training."""

    def construct_real(linear):
        from collections import OrderedDict

        return torch.nn.Sequential(OrderedDict([
            ("body", torch.nn.Sequential(OrderedDict([
                # ("linear", linear(n_features, n_features, bias=True)),
                # ("relu", torch.nn.LeakyReLU()),
            ]))),
            ("final", linear(n_features, n_output, bias=False)),
        ]))

    def construct_cplx(linear):
        from collections import OrderedDict
        from cplxmodule.nn import RealToCplx, CplxToReal
        from cplxmodule.nn import CplxAdaptiveModReLU

        return torch.nn.Sequential(OrderedDict([
            ("cplx", RealToCplx()),
            ("body", torch.nn.Sequential(OrderedDict([
                # ("linear", linear(n_features // 2, n_features // 2, bias=True)),
                # ("relu", CplxAdaptiveModReLU(n_features // 2)),
            ]))),
            ("final", linear(n_features // 2, n_output // 2, bias=False)),
            ("real", CplxToReal()),
        ]))

    device_ = torch.device("cpu")
    if kind == "cplx":
        layers = [CplxLinear, CplxLinearARD, CplxLinearMasked]
        construct = construct_cplx
        reduction = "mean"
        phases = {
            "CplxLinear": (1000, 0.0),
            "CplxLinearARD": (4000, 1e-1),
            "CplxLinearMasked": (500, 0.0)
        }

    elif kind == "real-ard":
        layers = [Linear, LinearARD, LinearMasked]
        construct = construct_real
        reduction = "mean"
        phases = {
            "Linear": (1000, 0.0),
            "LinearARD": (4000, 1e-1),
            "LinearMasked": (500, 0.0)
        }

    elif kind == "real-l0":
        layers = [Linear, LinearL0, LinearMasked]
        construct = construct_real
        reduction = "sum"
        phases = {
            "Linear": (1000, 0.0),
            "LinearL0": (4000, 2e-2),
            "LinearMasked": (500, 0.0)
        }

    elif kind == "real-lasso":
        layers = [Linear, LinearLASSO, LinearMasked]
        construct = construct_real
        reduction = "mean"
        phases = {
            "Linear": (1000, 0.0),
            "LinearLASSO": (4000, 1e-1),
            "LinearMasked": (500, 0.0)
        }

    if kind == "real-lasso":
        tau = 0.25
    else:
        tau = 0.73105  # p = a / 1 + a, a = p / (1 - p)
    threshold = np.log(tau) - np.log(1 - tau)
    print(f"\n{80*'='}\n{tau:.1%} - {threshold:.3g}")

    n_features = 500 if "cplx" in kind else 250
    n_output = 20 if "cplx" in kind else 10

    # a simple dataset
    X = torch.randn(10100, n_features)
    y = - X[:, :n_output].clone()
    X, y = X.to(device_), y.to(device_)

    train_X, train_y = X[:100], y[:100]
    test_X, test_y = X[100:], y[100:]

    # construct models
    models = {"none": None}
    models.update({
        l.__name__: construct(l) for l in layers
    })

    # train a sequence of models
    names, losses = list(models.keys()), {}
    for src, dst in zip(names[:-1], names[1:]):
        print(f">>>>>> {dst}")
        n_steps, klw = phases[dst]

        # load the current model with the last one's weights
        model = models[dst]
        if models[src] is not None:
            # compute the dropout masks and normalize them
            state_dict = models[src].state_dict()
            masks = compute_ard_masks(models[src], hard=False,
                                      threshold=threshold)

            state_dict, masks = binarize_masks(state_dict, masks)

            # deploy old weights onto the new model
            model.load_state_dict(state_dict, strict=False)

            # conditionally deploy the computed dropout masks
            model = deploy_masks(model, state_dict=masks)

        model.to(device_)

        model, losses[dst] = model_train(train_X, train_y, model,
                                         n_steps=n_steps, threshold=threshold,
                                         klw=klw, reduction=reduction)
    # end for

    # get scores on test
    for key, model in models.items():
        if model is None:
            continue

        print(f"\n>>>>>> {key}")
        model_test(test_X, test_y, model, threshold=threshold)
        print(model.final.weight)
        print([*named_masks(model)])
        print([*named_sparsity(model, hard=True, threshold=threshold)])
Ejemplo n.º 4
0
def example_bilinear(kind="real"):
    r"""An example, illustrating pre-training."""
    from cplxmodule.nn import RealToCplx, CplxToReal
    from cplxmodule.cplx import from_real, to_real

    class BilinearTest(torch.nn.Module):
        def __init__(self, bilinear):
            super().__init__()
            self.final = bilinear(n_features, n_features, 1, bias=False)

        def forward(self, input):
            return self.final(input, input)

    class CplxBilinearTest(torch.nn.Module):
        def __init__(self, bilinear):
            super().__init__()
            self.cplx = RealToCplx()
            self.final = bilinear(n_features // 2, n_features // 2, 1, bias=False)
            self.real = CplxToReal()

        def forward(self, input):
            z = self.cplx(input)
            return self.real(self.final(z, z))

    device_ = torch.device("cpu")
    reduction = "mean"
    if kind == "cplx":
        layers = [CplxBilinear, CplxBilinearARD, CplxBilinearMasked]
        construct = CplxBilinearTest
        reduction = "mean"
        phases = {
            "CplxBilinear": (1000, 0.0),
            "CplxBilinearARD": (10000, 1e-1),
            "CplxBilinearMasked": (500, 0.0)
        }

    elif kind == "real":
        layers = [Bilinear, BilinearARD, BilinearMasked]
        phases = {
            "Bilinear": (1000, 0.0),
            "BilinearARD": (10000, 1e-1),
            "BilinearMasked": (500, 0.0)
        }
        construct = BilinearTest

    tau = 0.73105  # p = a / 1 + a, a = p / (1 - p)
    threshold = np.log(tau) - np.log(1 - tau)
    print(f"\n{80*'='}\n{tau:.1%} - {threshold:.3g}")

    n_features, n_output = 50, 10

    # a simple dataset : larger than in linear ARD!
    X = torch.randn(10500, n_features)
    out = X[:, :n_output]
    if "cplx" in kind:
        z = from_real(out, copy=False)
        y = - to_real(z.conj * z, flatten=False).mean(dim=-2)

    else:
        y = - (out * out).mean(dim=-1, keepdim=True)

    X, y = X.to(device_), y.to(device_)

    train_X, train_y = X[:500], y[:500]
    test_X, test_y = X[500:], y[500:]

    # construct models
    models = {"none": None}
    models.update({
        l.__name__: construct(l) for l in layers
    })

    # train a sequence of models
    names, losses = list(models.keys()), {}
    for src, dst in zip(names[:-1], names[1:]):
        print(f">>>>>> {dst}")
        n_steps, klw = phases[dst]

        # load the current model with the last one's weights
        model = models[dst]
        if models[src] is not None:
            # compute the dropout masks and normalize them
            state_dict = models[src].state_dict()
            masks = compute_ard_masks(models[src], hard=False,
                                      threshold=threshold)

            state_dict, masks = binarize_masks(state_dict, masks)

            # deploy old weights onto the new model
            print(model.load_state_dict(state_dict, strict=False))

            # conditionally deploy the computed dropout masks
            model = deploy_masks(model, state_dict=masks)

        model.to(device_)

        model, losses[dst] = model_train(train_X, train_y, model,
                                         n_steps=n_steps, threshold=threshold,
                                         klw=klw, reduction=reduction)
    # end for

    # get scores on test
    for key, model in models.items():
        if model is None:
            continue

        print(f"\n>>>>>> {key}")
        model_test(test_X, test_y, model, threshold=threshold)
        print(model.final.weight)
        print([*named_masks(model)])
        print([*named_sparsity(model, hard=True, threshold=threshold)])
Ejemplo n.º 5
0
        model = models[dst]
        if models[src] is not None:
            # compute the dropout masks and normalize them
            state_dict = models[src].state_dict()
            masks = compute_ard_masks(models[src], hard=False,
                                      threshold=threshold)

            state_dict, masks = binarize_masks(state_dict, masks)

            # deploy old weights onto the new model
            model.load_state_dict(state_dict, strict=False)

            # conditionally deploy the computed dropout masks
            model = deploy_masks(model, state_dict=masks)

        model.to(device_)
        optim = torch.optim.Adam(model.parameters())
        model, losses[dst] = model_fit(
            model, feeds["train"], optim, n_steps=n_steps,
            threshold=threshold, klw=klw, reduction="mean")

    # run tests
    for key, model in models.items():
        if model is None:
            continue

        print(f"\n>>>>>> {key}")
        model_score(model, feeds["test"], threshold=threshold)
        # print([*named_masks(model)])
        print([*named_sparsity(model, hard=True, threshold=threshold)])