Ejemplo n.º 1
0
    def _bounded_data2param(self, data, lb=0., ub=1.):
        data = enforce_float_tensor(data)
        lb = npt.tensor(lb, min_ndim=0)
        ub = npt.tensor(ub, min_ndim=0)
        if lb is None and ub is None:  # Unbounded
            return data
        else:
            if lb is None:
                lb = -np.inf
            if ub is None:
                ub = np.inf
            data = torch.clamp(
                data, min=float(lb + self.epsilon),
                max=float(ub - self.epsilon)
            )

            if lb == -np.inf:
                # data[data > ub - self.epsilon] = ub - self.epsilon
                return torch.log(ub - data)
            elif ub == np.inf:
                # data[data < lb + self.epsilon] = lb + self.epsilon
                return torch.log(data - lb)
            else:
                # data[data < lb + self.epsilon] = lb + self.epsilon
                # data[data > ub - self.epsilon] = ub - self.epsilon
                p = (data - lb) / (ub - lb)
                return torch.log(p) - torch.log(1. - p)
Ejemplo n.º 2
0
 def register_bounded_parameter(self, name, data, lb=0., ub=1.):
     lb = npt.tensor(lb, min_ndim=0)
     ub = npt.tensor(ub, min_ndim=0)
     data = enforce_float_tensor(data)
     self._params_bounded[name] = {'lb':lb, 'ub':ub}
     param = self._bounded_data2param(data, lb, ub)
     self.register_parameter('_bounded_' + name,
                             nn.Parameter(param, requires_grad=True))
     self.params_bounded.__dict__[name] = None # just a reminder
Ejemplo n.º 3
0
 def __init__(self, data, lb=0., ub=1., skip_loading_lbub=False,
              requires_grad=True, **kwargs):
     super().__init__(**kwargs)
     self.lb = npt.tensor(lb)
     self.ub = npt.tensor(ub)
     self.skip_loading_lbub = skip_loading_lbub
     self._param = nn.Parameter(self.data2param(data),
                                requires_grad=requires_grad)
     if self._param.ndim == 0:
         raise Warning('Use ndim>0 to allow consistent use of [:]. '
                       'If ndim=0, use paramname.v to access the '
                       'value.')
Ejemplo n.º 4
0
 def _bounded_param2data(self, param, lb=0., ub=1.):
     lb = npt.tensor(lb, min_ndim=0)
     ub = npt.tensor(ub, min_ndim=0)
     param = enforce_float_tensor(param)
     if lb is None and ub is None:  # Unbounded
         return param
     elif lb is None:
         return ub - torch.exp(param)
     elif ub is None:
         return lb + torch.exp(param)
     else:
         return (1. / (1. + torch.exp(-param))) * (ub - lb) + lb
Ejemplo n.º 5
0
    def get_named_bounded_params(
        self,
        named_bounded_params: Dict[str, BoundedParameter] = None,
        exclude: Iterable[str] = ()
    ) -> (Iterable[str], np.ndarray, np.ndarray, np.ndarray, np.ndarray):
        """

        :param named_bounded_params:
        :param exclude:
        :return: names, v, grad, lb, ub
        """
        if named_bounded_params is None:
            d = odict([
                (k, v) for k, v in self.named_modules()
                if (isinstance(v, OverriddenParameter)  #
                    # BoundedParameter)
                    and k not in exclude)
            ])
        else:
            d = named_bounded_params
        names = []
        v = []
        lb = []
        ub = []
        grad = []
        requires_grad = []
        for name, param in d.items():
            v0 = param.v.flatten()
            if param._param.grad is None:
                g0 = torch.zeros_like(v0)
            else:
                g0 = param._param.grad.flatten()
            l0 = npt.tensor(param.lb).expand_as(param.v).flatten()
            u0 = npt.tensor(param.ub).expand_as(param.v).flatten()

            for i, (v1, g1, l1, u1) in enumerate(zip(v0, g0, l0, u0)):
                v.append(npy(v1))
                grad.append(npy(g1))
                lb.append(npy(l1))
                ub.append(npy(u1))
                requires_grad.append(npy(param._param.requires_grad))
                if v0.numel() > 1:
                    names.append(name + '%d' % i)
                else:
                    names.append(name)
        v = np.stack(v)
        lb = np.stack(lb)
        ub = np.stack(ub)
        grad = -np.stack(grad)  # minimizing; so take negative
        requires_grad = np.stack(requires_grad)
        return names, v, grad, lb, ub, requires_grad
Ejemplo n.º 6
0
    def data2param(self, data) -> torch.Tensor:
        lb = self.lb
        ub = self.ub
        data = enforce_float_tensor(data)
        if lb is None and ub is None:  # Unbounded
            return data
        elif lb is None:
            data[data > ub - self.epsilon] = ub - self.epsilon
            return torch.log(ub - data)
        elif ub is None:
            data[data < lb + self.epsilon] = lb + self.epsilon
            return torch.log(data - lb)
        elif npt.tensor(lb == ub).all():
            return torch.zeros_like(data)
        else:
            too_small = data < lb + self.epsilon
            try:
                data[too_small] = lb + self.epsilon
            except RuntimeError:
                data[too_small] = (lb + self.epsilon)[too_small]

            too_big = data > ub - self.epsilon
            try:
                data[too_big] = ub - self.epsilon
            except RuntimeError:
                data[too_big] = (ub - self.epsilon)[too_big]
            p = (data - lb) / (ub - lb)
            return torch.log(p) - torch.log(1. - p)
Ejemplo n.º 7
0
 def _load_from_state_dict(
         self, state_dict, prefix, local_metadata, strict,
         missing_keys, unexpected_keys, error_msgs):
     lb_name = prefix + '_lb'
     ub_name = prefix + '_ub'
     param_name = prefix + '_param'
     data_name = prefix + '_data'
     if self.skip_loading_lbub:
         if lb_name in state_dict:
             state_dict.pop(lb_name)
         if ub_name in state_dict:
             state_dict.pop(ub_name)
     else:
         if lb_name in state_dict:
             self.lb = npt.tensor(state_dict.pop(lb_name))
         if ub_name in state_dict:
             self.ub = npt.tensor(state_dict.pop(ub_name))
     if data_name in state_dict:
         state_dict[param_name] = self.data2param(
             state_dict.pop(data_name).detach().clone())
     return super()._load_from_state_dict(
         state_dict, prefix, local_metadata, strict,
         missing_keys, unexpected_keys, error_msgs)
Ejemplo n.º 8
0
 def param2data(self, param):
     lb = self.lb
     ub = self.ub
     param = enforce_float_tensor(param)
     if lb is None and ub is None: # Unbounded
         return param
     elif lb is None:
         return torch.tensor(ub) - torch.exp(param)
     elif ub is None:
         return lb + torch.exp(param)
     elif npt.tensor(lb == ub).all():
         return torch.zeros_like(param) + lb
     else:
         return (1 / (1 + torch.exp(-param))) * (ub - lb) + lb  # noqa
Ejemplo n.º 9
0
    def dat2p_dat(
        self, ch_tr_dim: np.ndarray, dur_tr: np.ndarray, ev_tr_dim: np.ndarray
    ) -> (torch.Tensor, torch.Tensor, np.ndarray, np.ndarray, np.ndarray,
          np.ndarray):
        """
        :param ch_tr_dim: [tr, dim]
        :param dur_tr: [tr]
        :param ev_tr_dim: [tr, dim]
        :return: n_cond_dur_ch[cond, dur, ch],
        ev_cond_fr_dim_meanvar[dcond, fr, dim, (mean, var)],
        ev_cond_dim[dcond, dim], dcond_tr[tr],
        durs[dur], ddur_tr[tr]
        """
        nt0 = self.nt0
        dt0 = self.dt0
        n_ch_flat = self.n_ch
        subsample_factor = self.subsample_factor

        nt = int(nt0 // subsample_factor)

        durs, ddur_tr = np.unique(dur_tr, return_inverse=True)
        ddur_tr = ddur_tr.astype(np.int)
        n_dur = len(durs)
        durs = torch.tensor(durs)
        ddur_tr = torch.tensor(ddur_tr, dtype=torch.long)

        ch_tr_flat = consts.ch_by_dim2ch_flat(ch_tr_dim)

        ev_cond_dim, dcond_tr = np.unique(ev_tr_dim,
                                          return_inverse=True,
                                          axis=0)
        n_cond_flat = len(ev_cond_dim)
        ev_cond_fr_dim = torch.tensor(ev_cond_dim)[:, None, :].expand(
            [-1, nt, -1])

        ev_cond_fr_dim_meanvar = torch.stack(
            [ev_cond_fr_dim, torch.zeros_like(ev_cond_fr_dim)], -1)

        n_cond_dur_ch = npt.tensor(
            npg.aggregate(np.stack([dcond_tr,
                                    npy(ddur_tr), ch_tr_flat]), 1., 'sum',
                          [n_cond_flat, n_dur, n_ch_flat]))

        return n_cond_dur_ch, ev_cond_fr_dim_meanvar, ev_cond_dim, dcond_tr, \
            durs, ddur_tr
Ejemplo n.º 10
0
def optimize(
        model: ModelType,
        fun_data: FunDataType,
        fun_loss: FunLossType,
        plotfuns: PlotFunsType,
        optimizer_kind='Adam',
        max_epoch=100,
        patience=20,  # How many epochs to wait before quitting
        thres_patience=0.001,  # How much should it improve wi patience
        learning_rate=.5,
        reduce_lr_by=0.5,
        reduced_lr_on_epoch=0,
        reduce_lr_after=50,
        reset_lr_after=100,
        to_plot_progress=True,
        show_progress_every=5,  # number of epochs
        to_print_grad=True,
        n_fold_valid=1,
        epoch_to_check=None,  # CHECKED
        comment='',
        **kwargs  # to ignore unnecessary kwargs
) -> (float, dict, dict, List[float], List[float]):
    """

    :param model:
    :param fun_data: (mode='all'|'train'|'valid'|'train_valid'|'test',
    fold_valid=0, epoch=0, n_fold_valid=1) -> (data, target)
    :param fun_loss: (out, target) -> loss
    :param plotfuns: [(str, fun)] where fun takes dict d with keys
    'data_*', 'target_*', 'out_*', 'loss_*', where * = 'train', 'valid', etc.
    :param optimizer_kind:
    :param max_epoch:
    :param patience:
    :param thres_patience:
    :param learning_rate:
    :param reduce_lr_by:
    :param reduced_lr_on_epoch:
    :param reduce_lr_after:
    :param to_plot_progress:
    :param show_progress_every:
    :param to_print_grad:
    :param n_fold_valid:
    :param kwargs:
    :return: loss_test, best_state, d, losses_train, losses_valid where d
    contains 'data_*', 'target_*', 'out_*', and 'loss_*', where * is
    'train_valid', 'test', and 'all'.
    """
    def get_optimizer(model, lr):
        if optimizer_kind == 'SGD':
            return optim.SGD(model.parameters(),
                             lr=lr)
        elif optimizer_kind == 'Adam':
            return optim.Adam(model.parameters(),
                              lr=lr)
        elif optimizer_kind == 'LBFGS':
            return optim.LBFGS(model.parameters(),
                               lr=lr)
        else:
            raise NotImplementedError()

    learning_rate0 = learning_rate
    optimizer = get_optimizer(model, learning_rate)

    best_loss_epoch = 0
    best_loss_valid = np.inf
    best_state = model.state_dict()
    best_losses = []

    # CHECKED storing and loading states
    state0 = None
    loss0 = None
    data0 = None
    target0 = None
    out0 = None
    outs0 = None

    def array2str(v):
        return ', '.join(['%1.2g' % v1 for v1 in v.flatten()[:10]])

    def print_targ_out(target0, out0, outs0, loss0):
        print('target:\n' + array2str(target0))
        print('outs:\n' + '\n'.join(
            ['[%s]' % array2str(v) for v in outs0]))
        print('out:\n' + array2str(out0))
        print('loss: ' + '%g' % loss0)

    def fun_outs(model, data):
        p_bef_lapse0 = model.dtb(*data)[0].detach().clone()
        p_aft_lapse0 = model.lapse(p_bef_lapse0).detach().clone()
        return [
            p_bef_lapse0, p_aft_lapse0
        ]

    def are_all_equal(outs, outs0):
        for i, (out1, out0) in enumerate(zip(outs, outs0)):
             if (out1 != out0).any():
                 warnings.warn(
                     'output %d different! max diff = %g' %
                     (i, (out1 - out0).abs().max()))
                 print('--')

    # losses_train[epoch] = average cross-validated loss for the epoch
    losses_train = []
    losses_valid = []

    if to_plot_progress:
        writer = SummaryWriter(comment=comment)
    t_st = time.time()
    epoch = 0

    try:
        for epoch in range(max([max_epoch, 1])):
            losses_fold_train = []
            losses_fold_valid = []
            for i_fold in range(n_fold_valid):
                # NOTE: Core part
                data_train, target_train = fun_data('train', i_fold, epoch,
                                                    n_fold_valid)
                model.train()
                if optimizer_kind == 'LBFGS':
                    def closure():
                        optimizer.zero_grad()
                        out_train = model(data_train)
                        loss = fun_loss(out_train, target_train)
                        loss.backward()
                        return loss
                    if max_epoch > 0:
                        optimizer.step(closure)
                    out_train = model(data_train)
                    loss_train1 = fun_loss(out_train, target_train)
                    raise NotImplementedError(
                        'Restoring best state is not implemented yet'
                    )
                else:
                    optimizer.zero_grad()
                    out_train = model(data_train)
                    loss_train1 = fun_loss(out_train, target_train)
                    # DEBUGGED: optimizer.step() must not be taken before
                    #  storing best_loss or best_state

                losses_fold_train.append(loss_train1)

                if n_fold_valid == 1:
                    out_valid = npt.tensor(npy(out_train))
                    loss_valid1 = npt.tensor(npy(loss_train1))
                    data_valid = data_train
                    target_valid = target_train

                    # DEBUGGED: Unless directly assigned, target_valid !=
                    #  target_train when n_fold_valid = 1, which doesn't make
                    #  sense. Suggests a bug in fun_data when n_fold = 1
                else:
                    model.eval()
                    data_valid, target_valid = fun_data('valid', i_fold, epoch,
                                                        n_fold_valid)
                    out_valid = model(data_valid)
                    loss_valid1 = fun_loss(out_valid, target_valid)
                    model.train()
                losses_fold_valid.append(loss_valid1)

            loss_train = torch.mean(torch.stack(losses_fold_train))
            loss_valid = torch.mean(torch.stack(losses_fold_valid))
            losses_train.append(npy(loss_train))
            losses_valid.append(npy(loss_valid))

            if to_plot_progress:
                writer.add_scalar(
                    'loss_train', loss_train,
                    global_step=epoch
                )
                writer.add_scalar(
                    'loss_valid', loss_valid,
                    global_step=epoch
                )

            # --- Store best loss
            # NOTE: storing losses/states must happen BEFORE taking a step!
            if loss_valid < best_loss_valid:
                # is_best = True
                best_loss_epoch = deepcopy(epoch)
                best_loss_valid = npt.tensor(npy(loss_valid))
                best_state = model.state_dict()

            best_losses.append(best_loss_valid)

            # CHECKED storing and loading state
            if epoch == epoch_to_check:
                loss0 = loss_valid.detach().clone()
                state0 = model.state_dict()
                data0 = deepcopy(data_valid)
                target0 = deepcopy(target_valid)
                out0 = out_valid.detach().clone()
                outs0 = fun_outs(model, data0)

                loss001 = fun_loss(out0, target0)
                # CHECKED: loss001 must equal loss0
                print('loss001 - loss0: %g' % (loss001 - loss0))

                print_targ_out(target0, out0, outs0, loss0)
                print('--')

            def print_loss():
                t_el = time.time() - t_st
                print('%1.0f sec/%d epochs = %1.1f sec/epoch, Ltrain: %f, '
                      'Lvalid: %f, LR: %g, best: %f, epochB: %d'
                      % (t_el, epoch + 1, t_el / (epoch + 1),
                         loss_train, loss_valid, learning_rate,
                         best_loss_valid, best_loss_epoch))

            if epoch % show_progress_every == 0:
                model.train()
                data_train_valid, target_train_valid = fun_data(
                    'train_valid', i_fold, epoch, n_fold_valid
                )
                out_train_valid = model(data_train_valid)
                loss_train_valid = fun_loss(out_train_valid, target_train_valid)
                print_loss()
                if to_plot_progress:
                    d = {
                        'data_train': data_train,
                        'data_valid': data_valid,
                        'data_train_valid': data_train_valid,
                        'out_train': out_train.detach(),
                        'out_valid': out_valid.detach(),
                        'out_train_valid': out_train_valid.detach(),
                        'target_train': target_train.detach(),
                        'target_valid': target_valid.detach(),
                        'target_train_valid': target_train_valid.detach(),
                        'loss_train': loss_train.detach(),
                        'loss_valid': loss_valid.detach(),
                        'loss_train_valid': loss_train_valid.detach()
                    }

                    for k, f in odict(plotfuns).items():
                        fig, d = f(model, d)
                        if fig is not None:
                            writer.add_figure(k, fig, global_step=epoch)

            # --- Learning rate reduction and patience
            # if epoch == reduced_lr_on_epoch + reset_lr_after
            # if epoch == reduced_lr_on_epoch + reduce_lr_after and (
            #         best_loss_valid
            #         > best_losses[-reduce_lr_after] - thres_patience
            # ):
            if epoch > 0 and epoch % reset_lr_after == 0:
                learning_rate = learning_rate0
            elif epoch > 0 and epoch % reduce_lr_after == 0:
                learning_rate *= reduce_lr_by
                optimizer = get_optimizer(model, learning_rate)
                reduced_lr_on_epoch = epoch

            if epoch >= patience and (
                    best_loss_valid
                    > best_losses[-patience] - thres_patience
            ):
                print('Ran out of patience!')
                if to_print_grad:
                    print_grad(model)
                break

            # --- Take a step
            if optimizer_kind != 'LBFGS':
                # steps are not taken above for n_fold_valid == 1, so take a
                # step here, after storing the best state
                loss_train.backward()
                if to_print_grad and epoch == 0:
                    print_grad(model)
                if max_epoch > 0:
                    optimizer.step()

    except Exception as ex:
        from lib.pylabyk.cacheutil import is_keyboard_interrupt
        if not is_keyboard_interrupt(ex):
            raise ex
        print('fit interrupted by user at epoch %d' % epoch)

        from lib.pylabyk.localfile import LocalFile, datetime4filename
        localfile = LocalFile()
        cache = localfile.get_cache('model_data_target')
        data_train_valid, target_train_valid = fun_data(
            'all', 0, 0, n_fold_valid)
        cache.set({
            'model': model,
            'data_train_valid': data_train_valid,
            'target_train_valid': target_train_valid
        })
        cache.save()

    print_loss()
    if to_plot_progress:
        writer.close()

    if epoch_to_check is not None:
        # Must print the same output as previous call to print_targ_out
        print_targ_out(target0, out0, outs0, loss0)

        model.load_state_dict(state0)
        state1 = model.state_dict()
        for (key0, param0), (key1, param1) in zip(
                state0.items(), state1.items()
        ):  # type: ((str, torch.Tensor), (str, torch.Tensor))
            if (param0 != param1).any():
                with torch.no_grad():
                    warnings.warn(
                        'Strange! loaded %s = %s\n'
                        '!= stored %s = %s\n'
                        'loaded - stored = %s'
                        % (key1, param1, key0, param0, param1 - param0))
        data, target = fun_data('valid', 0, epoch_to_check, n_fold_valid)

        if not torch.is_tensor(data):
            p_unequal = torch.tensor([
                (v1 != v0).double().mean() for v1, v0
                in zip(data, data0)
            ])
            if (p_unequal > 0).any():
                print('Strange! loaded data != stored data0\n'
                      'Proportion: %s' % p_unequal)
            else:
                print('All loaded data == stored data')
        elif (data != data0).any():
            print('Strange! loaded data != stored data0')
        else:
            print('All loaded data == stored data')

        if (target != target0).any():
            print('Strange! loaded target != stored target0')
        else:
            print('All loaded target == stored target')

        print_targ_out(target0, out0, outs0, loss0)

        # with torch.no_grad():
        #     out01 = model(data0)
        #     loss01 = fun_loss(out01, target0)
        model.train()
        # with torch.no_grad():
        # CHECKED
        # outs1 = fun_outs(model, data)
        # are_all_equal(outs1, outs0)

        out1 = model(data)
        if (out0 != out1).any():
            warnings.warn(
                'Strange! out from loaded params != stored out\n'
                'Max abs(loaded - stored): %g' %
                (out1 - out0).abs().max())
            print('--')
        else:
            print('out from loaded params = stored out')

        loss01 = fun_loss(out0, target0)
        print_targ_out(target0, out0, outs0, loss01)

        if loss0 != loss01:
            warnings.warn(
                'Strange!  loss1 = %g simply computed again with out0, '
                'target0\n'
                '!= stored loss0 = %g\n'
                'loaded - stored:  %g\n'
                'Therefore, fun_loss, out0, or target0 has changed!' %
                (loss01, loss0, loss01 - loss0))
            print('--')
        else:
            print('loss0 == loss01, simply computed again with out0, target0')

        loss1 = fun_loss(out1, target)
        if loss0 != loss1:
            warnings.warn(
                'Strange!  loss1 = %g from loaded params\n'
                '!= stored loss0 = %g\n'
                'loaded - stored:  %g' %
                (loss1, loss0, loss1 - loss0))
            print('--')
        else:
            print('loss1 = %g = loss0 = %g' % (loss1, loss0))

        loss10 = fun_loss(out1, target0)
        if loss0 != loss1:
            warnings.warn(
                'Strange!  loss10 = %g from loaded params and stored '
                'target0\n'
                '!= stored loss0 = %g\n'
                'loaded - stored:  %g' %
                (loss10, loss0, loss10 - loss0))
            print('--')
        else:
            print('loss10 = %g = loss10 = %g' % (loss1, loss0))
        print('--')

    model.load_state_dict(best_state)

    d = {}
    for mode in ['train_valid', 'valid', 'test', 'all']:
        data, target = fun_data(mode, 0, 0, n_fold_valid)
        out = model(data)
        loss = fun_loss(out, target)
        d.update({
            'data_' + mode: data,
            'target_' + mode: target,
            'out_' + mode: npt.tensor(npy(out)),
            'loss_' + mode: npt.tensor(npy(loss))
        })

    if d['loss_valid'] != best_loss_valid:
        print('d[loss_valid]      = %g from loaded best_state \n'
              '!= best_loss_valid = %g\n'
              'd[loss_valid] - best_loss_valid = %g' %
              (d['loss_valid'], best_loss_valid,
               d['loss_valid'] - best_loss_valid))
        print('--')

    if isinstance(model, OverriddenParameter):
        print(model.__str__())
    elif isinstance(model, BoundedModule):
        pprint(model._parameters_incl_bounded)
    else:
        pprint(model.state_dict())

    return d['loss_test'], best_state, d, losses_train, losses_valid