Ejemplo n.º 1
0
def gradient(model, feats_dir, steps, **kwargs):
    layers = [layer for layer in utils.get_layers(model) if "conv" in layer]

    empirical = {layer: {} for layer in layers}
    for i in tqdm(range(len(steps))):
        step = steps[i]
        if step == 0:
            continue
        weight_buffers = utils.load_features(
            steps=[str(step)],
            feats_dir=feats_dir,
            model=model,
            suffix="weight.grad_norm_buffer",
            group="buffers",
        )
        bias_buffers = utils.load_features(
            steps=[str(step)],
            feats_dir=feats_dir,
            model=model,
            suffix="bias.grad_norm_buffer",
            group="buffers",
        )
        for layer in layers:
            wl_t = weight_buffers[layer][f"step_{step}"]
            bl_t = bias_buffers[layer][f"step_{step}"]
            empirical[layer][step] = utils.in_synapses(wl_t, bl_t)

    return {"empirical": empirical}
Ejemplo n.º 2
0
def scale_momentum(model, feats_dir, steps, **kwargs):
    lr = kwargs.get("lr")
    wd = kwargs.get("wd")
    momentum = kwargs.get("momentum")
    dampening = kwargs.get("dampening")

    lr = np.array(lr, dtype=np.float128)
    wd = np.array(wd, dtype=np.float128)
    momentum = np.array(momentum, dtype=np.float128)
    dampening = np.array(dampening, dtype=np.float128)

    denom = lr * (1 - dampening) * (1 + momentum)
    gamma = (1 - momentum) / denom
    omega = np.sqrt(4 * wd / denom)

    layers = [layer for layer in utils.get_layers(model) if "conv" in layer]
    W_0 = utils.load_features(
        steps=[str(steps[0])],
        feats_dir=feats_dir,
        model=model,
        suffix="weight",
        group="params",
    )
    b_0 = utils.load_features(
        steps=[str(steps[0])],
        feats_dir=feats_dir,
        model=model,
        suffix="bias",
        group="params",
    )

    load_kwargs = {
        "model": model,
        "feats_dir": feats_dir,
    }
    theory_kwargs = {
        "lr": lr,
        "wd": wd,
        "momentum": momentum,
        "dampening": dampening,
        "gamma": gamma,
        "omega": omega,
        "W_0": W_0,
        "b_0": b_0,
        "step_0": steps[0],
    }

    theoretical = {layer: {} for layer in layers}
    empirical = {layer: {} for layer in layers}
    for i in tqdm(range(len(steps))):
        step = steps[i]
        theory_kwargs["i"] = i
        load_kwargs["group"] = "buffers"
        compute_theoretical_momentum(
            step, layers, load_kwargs, theoretical, **theory_kwargs,
        )
        load_kwargs["group"] = "params"
        compute_empirical(step, layers, load_kwargs, empirical)

    return {"empirical": empirical, "theoretical": theoretical}
Ejemplo n.º 3
0
def weights_grads_full(model, feats_dir, steps, **kwargs):
    lr = kwargs.get("lr")
    wd = kwargs.get("wd")

    layers = [layer for layer in utils.get_layers(model)]# if "conv" in layer]
    load_kwargs = {
        "model": model,
        "feats_dir": feats_dir,
    }

    weights_and_grads = {layer: {"weight":[],"grad":[]} for layer in layers}
    steps = np.unique(steps)
    steps.sort()
    for i in tqdm(range(1, len(steps))):
        step = steps[i]
        extract_weights_and_grads(step, layers, load_kwargs, weights_and_grads, **kwargs)

    print("Allocating numpy arrays")
    all_weights = []
    all_grads = []
    for layer in layers:
        all_weights.append(np.array(weights_and_grads[layer]["weight"]))
        all_grads.append(np.array(weights_and_grads[layer]["grad"]))

    all_weights_and_grads = {
        "weights": np.array(all_weights),
        "grads": np.array(all_grads),
        "steps": steps[1:],
    }

    return all_weights_and_grads
Ejemplo n.º 4
0
def translation(model, feats_dir, steps, **kwargs):
    lr = kwargs.get("lr")
    wd = kwargs.get("wd")

    layers = [
        layer for layer in utils.get_layers(model) if "classifier" in layer
    ]
    W_0 = utils.load_features(
        steps=[str(steps[0])],
        feats_dir=feats_dir,
        model=model,
        suffix="weight",
        group="params",
    )
    b_0 = utils.load_features(
        steps=[str(steps[0])],
        feats_dir=feats_dir,
        model=model,
        suffix="bias",
        group="params",
    )

    load_kwargs = {
        "model": model,
        "feats_dir": feats_dir,
    }
    theory_kwargs = {
        "lr": lr,
        "wd": wd,
        "W_0": W_0,
        "b_0": b_0,
        "step_0": steps[0],
    }

    theoretical = {layer: {} for layer in layers}
    empirical = {layer: {} for layer in layers}
    for i in tqdm(range(len(steps))):
        step = steps[i]
        theory_kwargs["i"] = i
        load_kwargs["group"] = "buffers"
        compute_theoretical(step, layers, load_kwargs, theoretical,
                            **theory_kwargs)
        load_kwargs["group"] = "params"
        compute_empirical(step, layers, load_kwargs, empirical)

    return {"empirical": empirical, "theoretical": theoretical}
Ejemplo n.º 5
0
def phase(model, feats_dir, steps, **kwargs):
    lr = kwargs.get("lr")
    wd = kwargs.get("wd")

    layers = [layer for layer in utils.get_layers(model) if "conv" in layer]

    load_kwargs = {
        "model": model,
        "feats_dir": feats_dir,
    }

    position = {layer: {} for layer in layers}
    velocity = {layer: {} for layer in layers}
    for i in tqdm(range(1, len(steps))):
        step = steps[i]
        compute_pos_vel(step, layers, load_kwargs, position, velocity, **kwargs)

    return {"position": position, "velocity": velocity}
Ejemplo n.º 6
0
def network(model, feats_dir, steps, **kwargs):
    subset = kwargs.get("subset", None)
    seed = kwargs.get("seed", 0)
    layers = [layer for layer in utils.get_layers(model)]
    empirical = {layer: {} for layer in layers}
    for i in range(len(steps)):
        step = steps[i]
        weights = utils.load_features(
            steps=[str(step)],
            feats_dir=feats_dir,
            model=model,
            suffix="weight",
            group="params",
        )
        biases = utils.load_features(
            steps=[str(step)],
            feats_dir=feats_dir,
            model=model,
            suffix="bias",
            group="params",
        )
        np.random.seed(seed)
        for layer in layers:
            Wl_t = weights[layer][f"step_{step}"]
            bl_t = biases[layer][f"step_{step}"]
            all_weights = np.concatenate((Wl_t.reshape(-1), bl_t.reshape(-1)))
            if subset is None:
                random_subset_idx = np.arange(len(all_weights))
            else:
                random_subset_idx = np.random.choice(len(all_weights),
                                                     size=min(
                                                         subset,
                                                         len(all_weights)),
                                                     replace=False)
            empirical[layer][step] = all_weights[random_subset_idx]

    return {"empirical": empirical}