Esempio n. 1
0
 def forward(self, x, m, s, seq_len, p=None):
     n, _, _ = x.shape
     z = self.encode(x)
     z = z[torch.arange(n), (seq_len - 1).type(dtype=torch.long)]
     pred = self.decode(x[:, :, 0], z)  # index: 0-time, 1-flux, 2-flux_err
     euc, cos = distances(x, pred)
     if p is None:
         zc = torch.cat((z, euc, cos, m, s), dim=1)
     else:
         zc = torch.cat((z, euc, cos, m, s, p.unsqueeze(-1)), dim=1)
     logits = self.estimation_network(zc)
     gamma = self.softmax(logits)
     phi, mu, cov = compute_params(zc, gamma)
     return pred, zc, logits, phi, mu, cov
Esempio n. 2
0
 def prepare_filters(self,
                     filters,
                     ratio=1,
                     neuralscale=False,
                     iteration=None,
                     search=False,
                     descent_idx=14,
                     prune_fname=None,
                     num_classes=100,
                     pruned_filters=None):
     if ratio != None:
         if neuralscale and iteration != 0:
             if search:  # perform iterative search of params (architecture desecent)
                 pkl_ld = pickle.load(
                     open("prune_record/param{}.pk".format(iteration - 1),
                          "rb"))
                 alpha = pkl_ld["train_alpha"]
                 beta = pkl_ld["train_beta"]
             else:  # list of params (done iterative searching/architecture descent)
                 pkl_ld = pickle.load(
                     open("prune_record/" + prune_fname + ".pk",
                          "rb"))["param"]
                 alpha = pkl_ld[descent_idx][0]
                 beta = pkl_ld[descent_idx][1]
             total_param = compute_params(
                 [int(cfg * ratio) for cfg in sum(filters, [])],
                 classes=num_classes,
                 model='mobilenetv2',
                 last=True)
             # fallback to uniform scaling
             if np.sum(beta) == 0:
                 cfg_tmp = list(alpha)
                 ratio_ = 1.2
                 cur_param = 0
                 while abs(cur_param - total_param) > 0.1 * total_param:
                     cur_param = compute_params(
                         [int(cfg * ratio_) for cfg in cfg_tmp],
                         classes=num_classes,
                         model='mobilenetv2',
                         last=True)
                     if cur_param < total_param:
                         ratio_ += 0.05
                     else:
                         ratio_ -= 0.05
                 filt_cnt = 0
                 new_config = []
                 for block in filters:
                     block_cfg = []
                     for blk_sz in range(len(block)):
                         filt = int(cfg_tmp[filt_cnt] * ratio_)
                         if filt < 10:  # filter count too low, add some stochasticity
                             block_cfg.append(filt + 10)
                         else:
                             block_cfg.append(filt)
                         filt_cnt += 1
                     new_config.append(block_cfg)
                 filters = new_config
             else:
                 tau = total_param  # initialize tau
                 approx_total_param = 0
                 while abs(approx_total_param - total_param) > int(
                         0.0005 * total_param):
                     approx_filts = []
                     for a, b in zip(alpha, beta):
                         approx_filts.append(a * tau**b)
                     approx_total_param = compute_params(
                         approx_filts,
                         classes=num_classes,
                         model='mobilenetv2',
                         last=True)
                     tau_update = 0
                     for a, b in zip(alpha, beta):
                         tau_update += a * tau**b * b / tau
                     if ((approx_total_param - total_param) *
                             tau_update) > tau:
                         tau *= 0.5
                     else:
                         tau = tau - 1.0 * (
                             (approx_total_param - total_param) *
                             tau_update)
                 filt_cnt = 0
                 new_config = []
                 for idx, block in enumerate(filters):
                     block_cfg = []
                     for blk_sz in range(len(block)):
                         filt = int(alpha[filt_cnt] * tau**beta[filt_cnt])
                         if search:  # only add stochasticity during architecture searching
                             if filt < 10:  # filter count too low, add some stochasticity
                                 block_cfg.append(filt + 10)
                             else:
                                 block_cfg.append(filt)
                         else:
                             block_cfg.append(filt)
                         filt_cnt += 1
                     new_config.append(block_cfg)
                 print(
                     new_config,
                     "approx parameters: {} total parameters: {}".format(
                         approx_total_param, total_param))
                 filters = new_config
         elif pruned_filters != None:
             total_param = compute_params(
                 [int(cfg * ratio) for cfg in sum(filters, [])],
                 classes=num_classes,
                 model='mobilenetv2',
                 last=True)
             cfg_tmp = pruned_filters
             ratio_ = 1.2
             cur_param = 0
             while abs(cur_param - total_param) > 0.005 * total_param:
                 cur_param = compute_params(
                     [int(cfg * ratio_) for cfg in cfg_tmp],
                     classes=num_classes,
                     model='mobilenetv2',
                     last=True)
                 if cur_param < total_param:
                     ratio_ += 0.00005
                 else:
                     ratio_ -= 0.00005
             filt_cnt = 0
             new_config = []
             for block in filters:
                 block_cfg = []
                 for blk_sz in range(len(block)):
                     filt = int(cfg_tmp[filt_cnt] * ratio_)
                     block_cfg.append(filt)
                     filt_cnt += 1
                 new_config.append(block_cfg)
             filters = new_config
             print("pruned uniform", new_config, "cur_params:", cur_param,
                   "total_params:", total_param)
         else:  # uniform scale
             new_config = []
             for idx, block in enumerate(filters):
                 block_cfg = []
                 for blk_sz in range(len(block)):
                     block_cfg.append(int(block[blk_sz] * ratio))
                 new_config.append(block_cfg)
             filters = new_config
             print(filters)
     return filters
Esempio n. 3
0
# #####
# EARLY
# #####
f = open('prune_record/' + args.prune_fname + '_0.csv', newline='')
reader = csv.reader(f, delimiter=',')
filters = []
for row in reader:
    filters.append(list(map(int, row)))
filters = np.array(filters)

# Compute total parameters
total_params = []
for filt in filters:  # over all iterations
    total_params.append(
        compute_params(filt, classes=num_classes, model=args.model))
total_params = np.array(total_params)

lin_reg = linear_model.LinearRegression()

ln_k = np.log(total_params)
A = np.stack((ln_k, np.ones(ln_k.shape)), axis=1)
b = np.log(filters)
x = np.matmul(
    np.matmul(np.linalg.inv(np.matmul(A.transpose(), A)), A.transpose()), b)
beta = x[0, :]
alpha = np.exp(x[1, :])
filt = np.array([total_params**b for b in beta])
filt = np.multiply(filt.transpose(), alpha).transpose()

print('early')
Esempio n. 4
0
    def prepare_filters(self,
                        config,
                        ratio=1,
                        neuralscale=False,
                        iteration=None,
                        search=False,
                        descent_idx=14,
                        prune_fname=None,
                        num_classes=100,
                        pruned_filters=None):
        if ratio != None:  # for ratio swiping
            if neuralscale and iteration != 0:  # use proposed efficient scaling method
                if search:  # perform iterative search of params (architecture desecent)
                    pkl_ld = pickle.load(
                        open("prune_record/param{}.pk".format(iteration - 1),
                             "rb"))
                    alpha = pkl_ld["train_alpha"]
                    beta = pkl_ld["train_beta"]
                else:  # list of params (done iterative searching/architecture descent)
                    pkl_ld = pickle.load(
                        open("prune_record/" + prune_fname + ".pk",
                             "rb"))["param"]
                    alpha = pkl_ld[descent_idx][0]
                    beta = pkl_ld[descent_idx][1]

                # total_param = compute_params_(vgg11(config=self.convert_filters(self, template=config, filter_list=[int(cfg*ratio) for cfg in list(filter(lambda a: a != 'M', config))]), num_classes=num_classes) )
                total_param = compute_params([
                    cfg * ratio
                    for cfg in list(filter(lambda a: a != 'M', config))
                ],
                                             classes=num_classes,
                                             model='vgg')
                tau = total_param  # initialize tau
                for j in range(2000):
                    approx_filts = []
                    for a, b in zip(alpha, beta):
                        approx_filts.append(int(a * tau**b))
                    # approx_total_param = compute_params(vgg11(config=self.convert_filters(self, template=config, filter_list=approx_filts), num_classes=num_classes) )
                    approx_total_param = compute_params(approx_filts,
                                                        classes=num_classes,
                                                        model='vgg')
                    tau_update = 0
                    for a, b in zip(alpha, beta):
                        tau_update += a * tau**b * b / tau
                    tau = tau - 50.0 * (
                        (approx_total_param - total_param) * tau_update)
                new_config = []
                cfg_cnt = 0
                for i in range(len(config)):
                    if config[i] != 'M':
                        new_config.append(
                            int(alpha[cfg_cnt] * tau**beta[cfg_cnt]))
                        cfg_cnt += 1
                    else:
                        new_config.append(config[i])  # M
                print(
                    new_config, "approx params: {} total params: {}".format(
                        approx_total_param, total_param))
            elif pruned_filters != None:
                total_param = compute_params([
                    cfg * ratio
                    for cfg in list(filter(lambda a: a != 'M', config))
                ],
                                             classes=num_classes,
                                             model='vgg')
                cfg_tmp = pruned_filters
                ratio_ = 1.2
                cur_param = 0
                while abs(cur_param - total_param) > 0.005 * total_param:
                    cur_param = compute_params(
                        [int(cfg * ratio_) for cfg in cfg_tmp],
                        classes=num_classes,
                        model='vgg')
                    if cur_param < total_param:
                        ratio_ += 0.00005
                    else:
                        ratio_ -= 0.00005
                filt_cnt = 0
                new_config = []
                for i in range(len(config)):
                    if config[i] != 'M':
                        new_config.append(int(cfg_tmp[filt_cnt] * ratio_))
                        filt_cnt += 1
                    else:
                        new_config.append(config[i])  # M
                print("pruned uniform", new_config, "cur_params:", cur_param,
                      "total_params:", total_param)
            else:  # uniform scaling
                new_config = []
                for i in range(len(config)):
                    if config[i] != 'M':
                        new_config.append(int(config[i] * ratio))
                    else:
                        new_config.append(config[i])  # M
        else:
            new_config = config
        return new_config
Esempio n. 5
0
    y_pred = np.argmax(y_prob_val, axis=1)
    accuracy = (y_val == y_pred).sum() / len(y_val)
    cm = confusion_matrix(y_val, y_pred)

    plot_confusion_matrix(cm, idx2lab, args.name, "{}/{}_cm_norm.png".format(fig_path, args.arch), normalize=True)
    plot_confusion_matrix(cm, idx2lab, "{}, accuracy: {:.4f}".format(args.name, accuracy), "{}/{}_cm.png".format(fig_path, args.arch), normalize=False)

softmax = torch.nn.Softmax(dim=1)

if args.name == "asas_sn" or args.name == "toy":
    z_val = torch.tensor(val_features, dtype=torch.float, device=args.d)
    z_test = torch.tensor(test_features, dtype=torch.float, device=args.d)
    logits_val = torch.tensor(val_logits, dtype=torch.float, device=args.d)
    logits_test = torch.tensor(test_logits, dtype=torch.float, device=args.d)
    
    phi_val, mu_val, cov_val = compute_params(z_val, softmax(logits_val))
    # phi_test, mu_test, cov_test = compute_params(z_test, softmax(logits_test))
    
    val_energy = compute_energy(z_val, phi=phi_val, mu=mu_val, cov=cov_val, size_average=False).cpu().numpy()
    test_energy = compute_energy(z_test, phi=phi_val, mu=mu_val, cov=cov_val, size_average=False).cpu().numpy()

    labels = np.ones(len(y_test))
    if args.name == "asas_sn":
        labels[y_test == 8] = 0  # class 8 is outlier
    elif args.name == "toy":
        labels[y_test == 3] = 0
        labels[y_test == 4] = 0
    # pdb.set_trace()
    scores_in = test_energy[labels == 1]
    scores_out = test_energy[labels == 0]
    average_precision = (labels == 0).sum() / len(labels)
Esempio n. 6
0
    def prune_neurons(self, optimizer):
        if self.dataset == "CIFAR10":
            num_classes = 10
        elif self.dataset == "CIFAR100":
            num_classes = 100
        elif self.dataset == "tinyimagenet":
            num_classes = 200

        # set number of pruned neurons to be a certain percentage
        all_neuron_units, neuron_units = self._count_number_of_neurons()
        filters = self.compute_filter_number()
        targeted_filter = [filt * self.size for filt in filters]
        targeted_params = compute_params(targeted_filter,
                                         classes=num_classes,
                                         model=self.model_name)
        cur_params = compute_params(filters,
                                    classes=num_classes,
                                    model=self.model_name)
        print("Before: ", filters)
        ratio = 0.9
        # while abs(cur_params - targeted_params) > int(targeted_params*0.0005):
        while targeted_params < cur_params:
            if self.method == 0:  # network slimming
                all_criteria = torch.tensor([
                    abs(criterion) for layer_criteria in self.prune_criteria
                    for criterion in layer_criteria[0]
                ]).cuda()
                prune_neurons_now = self.pruned_neurons + self.prune_per_iteration
                threshold_now = torch.sort(all_criteria)[0][prune_neurons_now]
                for layer, layer_criteria in enumerate(self.prune_criteria):
                    for unit, criterion in enumerate(layer_criteria[0]):
                        if abs(criterion) <= threshold_now:
                            # do actual pruning
                            self.pruning_gates[layer][unit] *= 0.0
                            self.parameters[layer].data[unit] *= 0.0
                            self.prune_criteria[layer][0].data[
                                unit] *= 0.0  # weight
                            self.prune_criteria[layer][1].data[
                                unit] *= 0.0  # bias (not important)

                # count number of neurons
                all_neuron_units, neuron_units = self._count_number_of_neurons(
                )
                self.pruned_neurons = all_neuron_units - neuron_units
                cur_filter = self.compute_filter_number()
                cur_params = compute_params(cur_filter,
                                            classes=num_classes,
                                            model=self.model_name)
                print(cur_params, cur_filter)
            elif self.method == 1:  # uniformly pruned across all layers
                cur_filter = [int(filt * ratio) for filt in filters]
                cur_params = compute_params(cur_filter,
                                            classes=num_classes,
                                            model=self.model_name)
                ratio *= 0.999
            else:
                print("No such method")
                exit()

        if self.method == 1:  # weight magnitude
            for layer, target_filt in enumerate(cur_filter):
                layer_criteria = np.asarray([
                    torch.norm(filt, 1).data.cpu().item()
                    for filt in self.prune_criteria[layer]
                ]).reshape(-1)

                # adaptively estimate threshold given a number of neurons to be removed
                threshold_now = np.sort(layer_criteria)[::-1][target_filt]

                for unit, criterion in enumerate(layer_criteria):
                    if abs(criterion) <= threshold_now:
                        # do actual pruning
                        self.pruning_gates[layer][unit] *= 0.0
                        self.parameters[layer].data[unit] *= 0.0
                        self.prune_criteria[layer].data[unit, :] *= 0.0
            cur_filter = self.compute_filter_number()

        # Set momentum buffer to 0
        for layer in range(len(self.pruning_gates)):
            for unit in range(len(self.pruning_gates[layer])):
                if self.pruning_gates[layer][unit]:
                    continue
                if 'momentum_buffer' in optimizer.state[
                        self.parameters[layer]].keys():
                    optimizer.state[
                        self.parameters[layer]]['momentum_buffer'][unit] *= 0.0
        print("After: ", cur_filter)
        print("Target Params:", targeted_params, " Approx. Param:", cur_params)
Esempio n. 7
0
def fit_params(iteration=None,
               prune_fname="filename",
               classes=10,
               model='vgg',
               in_channel=3,
               kernel=3):
    if iteration == None:
        f = open('prune_record/train.csv', newline='')
    else:
        f = open('prune_record/' + prune_fname + '_{}.csv'.format(iteration),
                 newline='')
    reader = csv.reader(f, delimiter=',')
    filters = []
    for row in reader:
        filters.append(list(map(int, row)))

    filters = np.array(filters, dtype=int)
    # Samples insuffcient to get good interpolation
    if filters.shape[0] < 6:
        filters = np.expand_dims(filters[0], axis=0)
    if filters.shape[
            0] == 1:  # not all layers pruned at least once, opt for uniform scaling
        alpha = filters[0]
        beta = np.zeros(filters.shape[1])
        # =======================
        # save scaling parameters
        # =======================
        pickle_save = {
            "train_alpha": alpha,
            "train_beta": beta,
        }
        if iteration != None:
            pickle_out = open("prune_record/param{}.pk".format(iteration),
                              "wb")
        else:
            pickle_out = open("prune_record/param.pk", "wb")
        pickle.dump(pickle_save, pickle_out)
        pickle_out.close()

        return alpha, beta
    # Compute total parameters
    total_params = []
    for filt in filters:  # over all iterations
        total_params.append(
            compute_params(filt,
                           classes=classes,
                           model=model,
                           in_channel=in_channel,
                           kernel=kernel,
                           last=True))
    total_params = np.array(total_params)

    # ######
    # LR (simple)
    # ######
    ln_tau = np.log(total_params)
    Tau = np.stack((ln_tau, np.ones(ln_tau.shape)), axis=1)
    Phi = np.log(filters)
    Theta = np.matmul(
        np.matmul(np.linalg.inv(np.matmul(Tau.transpose(), Tau)),
                  Tau.transpose()), Phi)
    beta = Theta[0, :]
    alpha = np.exp(Theta[1, :])

    f.close()

    # =======================
    # save scaling parameters
    # =======================
    pickle_save = {
        "train_alpha": alpha,
        "train_beta": beta,
    }

    if iteration != None:
        pickle_out = open("prune_record/param{}.pk".format(iteration), "wb")
    else:
        pickle_out = open("prune_record/param.pk", "wb")
    pickle.dump(pickle_save, pickle_out)
    pickle_out.close()

    return alpha, beta