Ejemplo n.º 1
0
    def partial_EM(self, data, cond_muh_ijk, indices, weights=None, eps=1e-4, maxiter=10, verbose=0):
        (i, j, k) = indices
        converged = False
        previous_L = utilities.average(
            self.likelihood(data), weights=weights) / self.N
        mini_epochs = 0
        if verbose:
            print('Partial EM %s, L = %.3f' % (mini_epochs, previous_L))
        while not converged:
            if self.nature in ['Bernoulli', 'Spin']:
                f = np.dot(data, self.weights[[i, j, k], :].T)
            elif self.nature == 'Potts':
                f = cy_utilities.compute_output_C(data, self.weights[[i, j, k], :, :], np.zeros([
                                                  data.shape[0], 3], dtype=curr_float))

            tmp = f - self.logZ[np.newaxis, [i, j, k]]
            tmp -= tmp.max(-1)[:, np.newaxis]
            cond_muh = np.exp(tmp) * self.muh[np.newaxis, [i, j, k]]
            cond_muh /= cond_muh.sum(-1)[:, np.newaxis]
            cond_muh *= cond_muh_ijk[:, np.newaxis]

            self.muh[[i, j, k]] = utilities.average(cond_muh, weights=weights)
            self.cum_muh = np.cumsum(self.muh)
            self.gh[[i, j, k]] = np.log(self.muh[[i, j, k]])
            self.gh -= self.gh.mean()
            if self.nature == 'Bernoulli':
                self.cond_muv[[i, j, k]] = utilities.average_product(
                    cond_muh, data, mean1=True, weights=weights) / self.muh[[i, j, k], np.newaxis]
                self.weights[[i, j, k]] = np.log(
                    (self.cond_muv[[i, j, k]] + eps) / (1 - self.cond_muv[[i, j, k]] + eps))
                self.logZ[[i, j, k]] = np.logaddexp(
                    0, self.weights[[i, j, k]]).sum(-1)
            elif self.nature == 'Spin':
                self.cond_muv[[i, j, k]] = utilities.average_product(
                    cond_muh, data, mean1=True, weights=weights) / self.muh[[i, j, k], np.newaxis]
                self.weights[[i, j, k]] = 0.5 * np.log(
                    (1 + self.cond_muv[[i, j, k]] + eps) / (1 - self.cond_muv[[i, j, k]] + eps))
                self.logZ[[i, j, k]] = np.logaddexp(
                    self.weights[[i, j, k]], -self.weights[[i, j, k]]).sum(-1)
            elif self.nature == 'Potts':
                self.cond_muv[[i, j, k]] = utilities.average_product(
                    cond_muh, data, c2=self.n_c, mean1=True, weights=weights) / self.muh[[i, j, k], np.newaxis, np.newaxis]
                self.cum_cond_muv[[i, j, k]] = np.cumsum(
                    self.cond_muv[[i, j, k]], axis=-1)
                self.weights[[i, j, k]] = np.log(
                    self.cond_muv[[i, j, k]] + eps)
                self.weights[[i, j, k]] -= self.weights[[i, j, k]
                                                        ].mean(-1)[:, :, np.newaxis]
                self.logZ[[i, j, k]] = utilities.logsumexp(
                    self.weights[[i, j, k]], axis=-1).sum(-1)

            current_L = utilities.average(
                self.likelihood(data), weights=weights) / self.N
            mini_epochs += 1
            converged = (mini_epochs >= maxiter) | (
                np.abs(current_L - previous_L) < eps)
            previous_L = current_L.copy()
            if verbose:
                print('Partial EM %s, L = %.3f' % (mini_epochs, current_L))
        return current_L
Ejemplo n.º 2
0
    def split_merge_criterion(self, data, Cmax=5, weights=None):
        likelihood, cond_muh = self.likelihood_and_expectation(data)
        norm = np.sqrt(utilities.average(cond_muh**2, weights=weights))
        J_merge = utilities.average_product(
            cond_muh, cond_muh, weights=weights) / (1e-10 + norm[np.newaxis, :] * norm[:, np.newaxis])
        J_merge = np.triu(J_merge, 1)
        proposed_merge = np.argsort(J_merge.flatten())[::-1][:Cmax]
        proposed_merge = [(merge % self.M, merge // self.M)
                          for merge in proposed_merge]

        tmp = cond_muh / self.muh[np.newaxis, :]

        if weights is None:
            J_split = np.array(
                [utilities.average(likelihood, weights=tmp[:, m]) for m in range(self.M)])
        else:
            J_split = np.array([utilities.average(
                likelihood, weights=tmp[:, m] * weights) for m in range(self.M)])

        proposed_split = np.argsort(J_split)[:3]
        proposed_merge_split = []
        for merge1, merge2 in proposed_merge:
            if proposed_split[0] in [merge1, merge2]:
                if proposed_split[1] in [merge1, merge2]:
                    proposed_merge_split.append(
                        (merge1, merge2, proposed_split[2]))
                else:
                    proposed_merge_split.append(
                        (merge1, merge2, proposed_split[1]))
            else:
                proposed_merge_split.append(
                    (merge1, merge2, proposed_split[0]))
        return proposed_merge_split
Ejemplo n.º 3
0
def get_cross_derivatives_Gaussian(V_pos, psi_pos, hlayer, n_cv, weights=None):
    db_dw = average(V_pos, c=n_cv, weights=weights)
    da_db = np.zeros(hlayer.N)
    WChat = covariance(psi_pos, V_pos, weights=weights, c1=1, c2=n_cv)
    var_e = average(psi_pos**2, weights=weights) - \
        average(psi_pos, weights=weights)**2
    if n_cv > 1:
        da_dw = 2 / np.sqrt(1 + 4 * var_e)[:, np.newaxis, np.newaxis] * WChat
    else:
        da_dw = 2 / np.sqrt(1 + 4 * var_e)[:, np.newaxis] * WChat
    return db_dw, da_db, da_dw
Ejemplo n.º 4
0
def likelihood(model,
               data,
               data_test,
               weights=None,
               weights_test=None,
               n_betas_AIS=20000,
               M_AIS=10):
    model.AIS(n_betas=n_betas_AIS, M=M_AIS, beta_type='linear')
    l = utilities.average(model.likelihood(data), weights=weights)
    l_test = utilities.average(model.likelihood(data_test),
                               weights=weights_test)
    return [l, l_test]
Ejemplo n.º 5
0
def auto_correl(data, nmax=None, nature='Bernoulli', n_c=1):
    B = data.shape[0]
    L = data.shape[1]
    N = data.shape[2]
    if nmax is None:
        nmax = L / 2

    if n_c == 1:
        data_hat = np.fft.fft(np.real(data), axis=1)
        C = np.real(np.fft.ifft(np.abs(data_hat)**2,
                                axis=1)).mean(0).mean(-1) / float(L)
        mu = data.mean(0).mean(0)
        if nature == 'Bernoulli':
            C_hat = 1 + 2 * C - 2 * mu.mean() - (mu**2 + (1 - mu)**2).mean()
        elif nature == 'Spin':
            C_hat = (1 + C) / 2 - (((1 + mu) / 2)**2 +
                                   ((1 - mu) / 2)**2).mean()
        return C_hat[:nmax] / C_hat[0]
    else:
        C = np.zeros(L)
        mu = utilities.average(data.reshape([B * L, N]), c=n_c)
        for c in range(n_c):
            data_ = (data == c)
            data_hat = np.fft.fft(np.real(data_), axis=1)
            C += np.real(np.fft.ifft(np.abs(data_hat)**2,
                                     axis=1)).mean(0).mean(-1) / float(L)
        C_hat = C - (mu**2).mean() * n_c
        return C_hat[:nmax] / C_hat[0]
Ejemplo n.º 6
0
 def symKL(self, PGM, data, weights=None):
     n_samples = data.shape[0]
     data_moi, _ = self.gen_data(n_samples)
     D = -utilities.average(self.likelihood(data) + PGM.free_energy(data), weights=weights) + (
         self.likelihood(data_moi) + PGM.free_energy(data_moi)).mean()
     D /= self.N
     return D
Ejemplo n.º 7
0
def weights_to_couplings_approx(RBM, data, weights=None):
    psi = RBM.vlayer.compute_output(data, RBM.weights)
    var = RBM.hlayer.var_from_inputs(psi)
    mean_var = utilities.average(var, weights=weights)
    J_eff = np.tensordot(RBM.weights,
                         RBM.weights * mean_var[:, np.newaxis, np.newaxis],
                         axes=[0, 0])
    J_eff = np.swapaxes(J_eff, 1, 2)
    return J_eff
Ejemplo n.º 8
0
 def print_wait_times(wait_times, string):
     print('Number of ' + string + ' in queue: ' +
           str(len(wait_times)) + ' player.')
     print('Average current ' + string + ' wait time: ' +
           str(average(wait_times)) + ' minutes')
     print('Median current ' + string + ' wait time: ' +
           str(median(wait_times)) + ' minutes')
     print('Modal current ' + string + ' wait time: ' +
           str(mode(wait_times)) + ' minutes')
     print('Max current ' + string + ' wait time: ' +
           str(max(wait_times)) + ' minutes')
Ejemplo n.º 9
0
def get_inception_score(data_gen,
                        dataset,
                        weights=None,
                        path_data='data/',
                        path_classifiers='classifiers/',
                        M=20,
                        eps=1e-10):
    try:
        classifier = pickle.load(
            open(path_classifiers + '%s_Classifier.data' % dataset,
                 'rb'))['classifier']
    except:
        print('Learning a classifier first....')
        train_env = {}
        exec('dataset_utils.load_%s(train_env,path=path_data)' % dataset)
        if 'train_labels' in train_env.keys():
            classifier = LogisticRegressionCV(n_jobs=5,
                                              multi_class='multinomial')
            classifier.fit(train_env['train_data'], train_env['train_labels'])
        else:
            nature, N, n_c = dataset_utils.infer_type_data(
                train_env['train_data'])
            classifier = moi.MoI(nature=nature, N=N, M=M, n_c=n_c)
            classifier.fit(train_env['train_data'],
                           verbose=0,
                           weights=train_env['train_weights'])
        pickle.dump({'classifier': classifier},
                    open(path_classifiers + '%s_Classifier.data' % dataset,
                         'wb'))
    if hasattr(classifier, 'predict_proba'):
        probas = classifier.predict_proba(data_gen)
    elif hasattr(classifier, 'expectation'):
        probas = classifier.expectation(data_gen)
    else:
        print('No expectation or predict_proba from classifier')
        return
    proba_av = utilities.average(probas, weights=weights)
    scores = (probas * np.log((probas + eps) / (proba_av + eps))).sum(-1)
    inception_score = np.exp(utilities.average(scores, weights=weights))
    return inception_score
Ejemplo n.º 10
0
def get_hidden_input(data, RBM, normed=False, offset=True):
    if normed:
        mu = utilities.average(data, c=21)
        norm_null = np.sqrt(((RBM.weights**2 * mu).sum(-1) -
                             (RBM.weights * mu).sum(-1)**2).sum(-1))
        return (RBM.vlayer.compute_output(data, RBM.weights) -
                RBM.hlayer.b[np.newaxis, :]) / norm_null[np.newaxis, :]
    else:
        if offset:
            return (RBM.vlayer.compute_output(data, RBM.weights) -
                    RBM.hlayer.b[np.newaxis, :])
        else:
            return (RBM.vlayer.compute_output(data, RBM.weights))
Ejemplo n.º 11
0
    def minibatch_fit(self, data, weights=None, eps=1e-5, update=True):
        h = self.expectation(data)
        self.muh = self.learning_rate * \
            utilities.average(h, weights=weights) + \
            (1 - self.learning_rate) * self.muh
        self.cum_muh = np.cumsum(self.muh)
        if update:
            self.gh = np.log(self.muh + eps)
            self.gh -= self.gh.mean()
        if self.nature == 'Bernoulli':
            self.muvh = self.learning_rate * \
                utilities.average_product(
                    h, data, weights=weights) + (1 - self.learning_rate) * self.muvh
            if update:
                self.cond_muv = self.muvh / (self.muh[:, np.newaxis])
                self.weights = np.log(
                    (self.cond_muv + eps) / (1 - self.cond_muv + eps))
        elif self.nature == 'Spin':
            self.muvh = self.learning_rate * \
                utilities.average(h, data, weights=weights) + \
                (1 - self.learning_rate) * self.muvh
            if update:
                self.cond_muv = self.muvh / self.muh[:, np.newaxis]
                self.weights = 0.5 * \
                    np.log((1 + self.cond_muv + eps) /
                           (1 - self.cond_muv + eps))
        else:
            self.muvh = self.learning_rate * utilities.average_product(
                h, data, c2=self.n_c, weights=weights) + (1 - self.learning_rate) * self.muvh
            if update:
                self.cond_muv = self.muvh / self.muh[:, np.newaxis, np.newaxis]
                self.weights = np.log(self.cond_muv + eps)
                self.weights -= self.weights.mean(-1)[:, :, np.newaxis]

        if update:
            self.logpartition()
Ejemplo n.º 12
0
    def minibatch_fit_symKL(self, data_PGM, PGM=None, data_MOI=None, F_PGM_dPGM=None, F_PGM_dMOI=None, F_MOI_dPGM=None, F_MOI_dMOI=None, cond_muh_dPGM=None, cond_muh_dMOI=None, weights=None):
        if data_MOI is None:
            data_MOI, _ = self.gen_data(data_PGM.shape[0])
        if F_PGM_dPGM is None:
            F_PGM_dPGM = PGM.free_energy(data_PGM)
        if F_PGM_dMOI is None:
            F_PGM_dMOI = PGM.free_energy(data_MOI)
        if (F_MOI_dPGM is None) | (cond_muh_dPGM is None):
            F_MOI_dPGM, cond_muh_dPGM = self.likelihood_and_expectation(
                data_PGM)
            F_MOI_dPGM *= -1
        if (F_MOI_dMOI is None) | (cond_muh_dMOI is None):
            F_MOI_dMOI, cond_muh_dMOI = self.likelihood_and_expectation(
                data_MOI)
            F_MOI_dMOI *= -1

        delta_lik = -F_PGM_dMOI + F_MOI_dMOI
        delta_lik -= delta_lik.mean()

        self.gradient = {}
        self.gradient['gh'] = utilities.average(
            cond_muh_dPGM, weights=weights) - self.muh + (delta_lik[:, np.newaxis] * cond_muh_dMOI).mean(0)
        if self.nature in ['Bernoulli', 'Spin']:
            self.gradient['weights'] = utilities.average_product(
                cond_muh_dPGM, data_PGM, mean1=True, weights=weights) + utilities.average_product(cond_muh_dMOI * delta_lik[:, np.newaxis], data_MOI, mean1=True)
            self.gradient['weights'] -= self.muh[:, np.newaxis] * self.cond_muv
        elif self.nature == 'Potts':
            self.gradient['weights'] = utilities.average_product(cond_muh_dPGM, data_PGM, mean1=True, c2=self.n_c, weights=weights) + utilities.average_product(
                cond_muh_dMOI * delta_lik[:, np.newaxis], data_MOI, mean1=True, c2=self.n_c)
            self.gradient['weights'] -= self.muh[:,
                                                 np.newaxis, np.newaxis] * self.cond_muv

        self.gh += self.learning_rate * self.gradient['gh']
        self.weights += self.learning_rate * self.gradient['weights']

        self.muh = np.exp(self.gh)
        self.muh /= self.muh.sum()
        self.cum_muh = np.cumsum(self.muh)
        if self.nature == 'Bernoulli':
            self.cond_muv = utilities.logistic(self.weights)
        elif self.nature == 'Spin':
            self.cond_muv = np.tanh(self.weights)
        elif self.nature == 'Potts':
            self.weights -= self.weights.mean(-1)[:, :, np.newaxis]
            self.cond_muv = np.exp(self.weights)
            self.cond_muv /= self.cond_muv.sum(-1)[:, :, np.newaxis]
            self.cum_cond_muv = np.cumsum(self.cond_muv, axis=-1)
        self.logpartition()
Ejemplo n.º 13
0
    def maximization(self, data, cond_muh, weights=None, eps=1e-6):
        self.muh = utilities.average(cond_muh, weights=weights)
        self.cum_muh = np.cumsum(self.muh)
        self.gh = np.log(self.muh)
        self.gh -= self.gh.mean()
        if self.nature == 'Bernoulli':
            self.cond_muv = utilities.average_product(
                cond_muh, data, mean1=True, weights=weights) / self.muh[:, np.newaxis]
            self.weights = np.log((self.cond_muv + eps) /
                                  (1 - self.cond_muv + eps))
        elif self.nature == 'Spin':
            self.cond_muv = utilities.average_product(
                cond_muh, data, mean1=True, weights=weights) / self.muh[:, np.newaxis]
            self.weights = 0.5 * \
                np.log((1 + self.cond_muv + eps) / (1 - self.cond_muv + eps))

        elif self.nature == 'Potts':
            self.cond_muv = utilities.average_product(
                cond_muh, data, c2=self.n_c, mean1=True, weights=weights) / self.muh[:, np.newaxis, np.newaxis]
            self.cum_cond_muv = np.cumsum(self.cond_muv, axis=-1)
            self.weights = np.log(self.cond_muv + eps)
            self.weights -= self.weights.mean(-1)[:, :, np.newaxis]
        self.logpartition()
Ejemplo n.º 14
0
def _Ker_weights_to_couplings_exact(x, RBM, data, weights=None, nbins=10):
    N = RBM.n_v
    M = RBM.n_h
    c = RBM.n_cv
    Jij = np.zeros([c, c])
    i = x / N
    j = x % N
    L = layer.Layer(N=1, nature=RBM.hidden)
    tmpW = RBM.weights.copy()
    subsetW = tmpW[:, [i, j], :].copy()
    tmpW[:, [i, j], :] *= 0
    psi_restr = RBM.vlayer.compute_output(data, tmpW)
    for m in range(M):
        count, hist = np.histogram(psi_restr[:, m],
                                   bins=nbins,
                                   weights=weights)
        hist = (hist[:-1] + hist[1:]) / 2
        hist_mod = (hist[:, np.newaxis, np.newaxis] +
                    subsetW[m, 0][np.newaxis, :, np.newaxis] +
                    subsetW[m, 1][np.newaxis, np.newaxis, :]).reshape(
                        [nbins * c**2, 1])
        if RBM.hidden == 'Gaussian':
            L.a[0] = RBM.hlayer.a[m]
            L.b[0] = RBM.hlayer.b[m]
        elif RBM.hidden == 'dReLU':
            L.a_plus[0] = RBM.hlayer.a_plus[m]
            L.a_minus[0] = RBM.hlayer.a_minus[m]
            L.theta_plus[0] = RBM.hlayer.theta_plus[m]
            L.theta_minus[0] = RBM.hlayer.theta_minus[m]
        Phi = utilities.average(L.logpartition(hist_mod).reshape([nbins, c,
                                                                  c]),
                                weights=count)
        Jij += (Phi[:, :, np.newaxis, np.newaxis] +
                Phi[np.newaxis, np.newaxis, :, :] -
                Phi[np.newaxis, :, :, np.newaxis].T -
                Phi[:, np.newaxis, np.newaxis, :]).sum(-1).sum(-1) / c**2
    return Jij
Ejemplo n.º 15
0
Archivo: bm.py Proyecto: jertubiana/PGM
    def fit(self,
            data,
            batch_size=100,
            nchains=100,
            learning_rate=None,
            extra_params=None,
            init='independent',
            optimizer='SGD',
            N_PT=1,
            N_MC=1,
            n_iter=10,
            lr_decay=True,
            lr_final=None,
            decay_after=0.5,
            l1=0,
            l1b=0,
            l1c=0,
            l2=0,
            l2_fields=0,
            no_fields=False,
            batch_norm=False,
            update_betas=None,
            record_acceptance=None,
            epsilon=1e-6,
            verbose=1,
            record=[],
            record_interval=100,
            p=[1, 0, 0],
            pseudo_count=0,
            weights=None):

        self.nchains = nchains
        self.optimizer = optimizer
        self.record_swaps = False
        self.batch_norm = batch_norm
        self.layer.batch_norm = batch_norm

        self.n_iter = n_iter

        if learning_rate is None:
            if self.nature in ['Bernoulli', 'Spin', 'Potts']:
                learning_rate = 0.1
            else:
                learning_rate = 0.01

            if self.optimizer == 'ADAM':
                learning_rate *= 0.1

        self.learning_rate = learning_rate
        self.lr_decay = lr_decay
        if self.lr_decay:
            self.decay_after = decay_after
            self.start_decay = self.n_iter * self.decay_after
            if lr_final is None:
                self.lr_final = 1e-2 * self.learning_rate
            else:
                self.lr_final = lr_final
            self.decay_gamma = (float(self.lr_final) /
                                float(self.learning_rate))**(
                                    1 / float(self.n_iter *
                                              (1 - self.decay_after)))

        self.gradient = self.initialize_gradient_dictionary()

        if self.optimizer == 'momentum':
            if extra_params is None:
                extra_params = 0.9
            self.momentum = extra_params
            self.previous_update = self.initialize_gradient_dictionary()

        elif self.optimizer == 'ADAM':
            if extra_params is None:
                extra_params = [0.9, 0.999, 1e-8]
            self.beta1 = extra_params[0]
            self.beta2 = extra_params[1]
            self.epsilon = extra_params[2]

            self.gradient_moment1 = self.initialize_gradient_dictionary()
            self.gradient_moment2 = self.initialize_gradient_dictionary()

        if weights is not None:
            weights = np.asarray(weights, dtype=float)

        mean = utilities.average(data, c=self.n_c, weights=weights)
        covariance = utilities.average_product(data,
                                               data,
                                               c1=self.n_c,
                                               c2=self.n_c,
                                               weights=weights)
        if pseudo_count > 0:
            p = data.shape[0] / float(data.shape[0] + pseudo_count)
            covariance = p**2 * covariance + p * \
                (1 - p) * (mean[np.newaxis, :, np.newaxis, :] * mean[:,
                                                                     np.newaxis, :, np.newaxis]) / self.n_c + (1 - p)**2 / self.n_c**2
            mean = p * mean + (1 - p) / self.n_c

        iter_per_epoch = data.shape[0] // batch_size
        if init != 'previous':
            norm_init = 0
            self.init_couplings(norm_init)
            if init == 'independent':
                self.layer.init_params_from_data(data,
                                                 eps=epsilon,
                                                 value='data')

        self.N_PT = N_PT
        self.N_MC = N_MC

        self.l1 = l1
        self.l1b = l1b
        self.l1c = l1c
        self.l2 = l2
        self.tmp_l2_fields = l2_fields
        self.no_fields = no_fields

        if self.N_PT > 1:
            if record_acceptance == None:
                record_acceptance = True
            self.record_acceptance = record_acceptance

            if update_betas == None:
                update_betas = True

            self._update_betas = update_betas

            if self.record_acceptance:
                self.mavar_gamma = 0.95
                self.acceptance_rates = np.zeros(N_PT - 1)
                self.mav_acceptance_rates = np.zeros(N_PT - 1)
            self.count_swaps = 0

            if self._update_betas:
                record_acceptance = True
                self.update_betas_lr = 0.1
                self.update_betas_lr_decay = 1

            if self._update_betas | (not hasattr(self, 'betas')):
                self.betas = np.arange(N_PT) / float(N_PT - 1)
                self.betas = self.betas[::-1]
            if (len(self.betas) != N_PT):
                self.betas = np.arange(N_PT) / float(N_PT - 1)
                self.betas = self.betas[::-1]

        if self.nature == 'Potts':
            (self.fantasy_x,
             self.fantasy_fields_eff) = self.layer.sample_from_inputs(np.zeros(
                 [self.N_PT * self.nchains, self.N, self.n_c]),
                                                                      beta=0)
        else:
            (self.fantasy_x,
             self.fantasy_fields_eff) = self.layer.sample_from_inputs(np.zeros(
                 [self.N_PT * self.nchains, self.N]),
                                                                      beta=0)
        if self.N_PT > 1:
            self.fantasy_x = self.fantasy_x.reshape(
                [self.N_PT, self.nchains, self.N])
            if self.nature == 'Potts':
                self.fantasy_fields_eff = self.fantasy_fields_eff.reshape(
                    [self.N_PT, self.nchains, self.N, self.n_c])
            else:
                self.fantasy_fields_eff = self.fantasy_fields_eff.reshape(
                    [self.N_PT, self.nchains, self.N])
            self.fantasy_E = np.zeros([self.N_PT, self.nchains])

        self.count_updates = 0
        if verbose:
            if weights is not None:
                lik = (self.pseudo_likelihood(data) *
                       weights).sum() / weights.sum()
            else:
                lik = self.pseudo_likelihood(data).mean()
            print('Iteration number 0, pseudo-likelihood: %.2f' % lik)

        result = {}
        if 'J' in record:
            result['J'] = []
        if 'F' in record:
            result['F'] = []

        count = 0

        for epoch in range(1, n_iter + 1):
            if verbose:
                begin = time.time()
            if self.lr_decay:
                if (epoch > self.start_decay):
                    self.learning_rate *= self.decay_gamma

            print('Starting epoch %s' % (epoch))
            for _ in range(iter_per_epoch):
                self.minibatch_fit(mean, covariance)

                if (count % record_interval == 0):
                    if 'J' in record:
                        result['J'].append(self.layer.couplings.copy())
                    if 'F' in record:
                        result['F'].append(self.layer.fields.copy())

                count += 1

            if verbose:
                end = time.time()
                if weights is not None:
                    lik = (self.pseudo_likelihood(data) *
                           weights).sum() / weights.sum()
                else:
                    lik = self.pseudo_likelihood(data).mean()

                print("[%s] Iteration %d, pseudo-likelihood = %.2f,"
                      " time = %.2fs" %
                      (type(self).__name__, epoch, lik, end - begin))

        return result
Ejemplo n.º 16
0
    def fit(self, data, weights=None, init_bias=0.1, verbose=1, eps=1e-5, maxiter=100, split_merge=True):
        # B = data.shape[0]
        initial_centroids = KMPP_choose_centroids(
            data, self.M, verbose=verbose)
        # initial_centroids = np.argsort(np.random.rand(B))[:self.M]
        if self.nature == 'Bernoulli':
            self.weights += init_bias / self.N * \
                (data[initial_centroids] - 0.5)
        elif self.nature == 'Spin':
            self.weights += 0.25 * init_bias / self.N * data[initial_centroids]
        elif self.nature == 'Potts':
            self.weights += init_bias / self.N * \
                binarize(data[initial_centroids], self.n_c) - \
                init_bias / (self.n_c * self.N)

        n_epoch = 0
        converged = (n_epoch >= maxiter)  # if nothing...
        previous_L = utilities.average(
            self.likelihood(data), weights=weights) / self.N
        current_L = previous_L.copy()

        if self.M < 3:
            split_merge = False

        if verbose:
            print('Iteration 0, L = %.3f' % current_L)

        while not converged:
            cond_muh = self.expectation(data)
            self.maximization(data, cond_muh, weights=weights)
            previous_L = current_L.copy()
            current_L = utilities.average(
                self.likelihood(data), weights=weights) / self.N
            n_epoch += 1
            converged = (n_epoch >= maxiter) | (
                np.abs(current_L - previous_L) < eps)
            if verbose:
                print('Iteration %s, L = %.3f' % (n_epoch, current_L))

        if split_merge:
            converged2 = False
            while not converged2:
                current_weights = self.weights.copy()
                current_cond_muv = self.cond_muv.copy()
                current_gh = self.gh.copy()
                current_muh = self.muh.copy()
                # current_cum_muh = self.cum_muh.copy()
                current_logZ = self.logZ.copy()
                if self.nature == 'Potts':
                    current_cum_cond_muv = self.cum_cond_muv.copy()
                previous_L = current_L.copy()

                current_cond_muh = self.expectation(data)
                proposed_merge_splits = self.split_merge_criterion(
                    data, Cmax=5, weights=weights)
                for proposed_merge_split in proposed_merge_splits:
                    self.merge_split(proposed_merge_split)
                    proposed_L = self.partial_EM(data, current_cond_muh[:, proposed_merge_split].sum(
                        -1), proposed_merge_split, weights=weights, eps=eps, maxiter=10, verbose=verbose)
                    converged3 = False
                    while not converged3:
                        cond_muh = self.expectation(data)
                        self.maximization(data, cond_muh, weights=weights)
                        previous_proposed_L = proposed_L.copy()
                        proposed_L = utilities.average(
                            self.likelihood(data), weights=weights) / self.N
                        n_epoch += 1
                        converged3 = (n_epoch >= maxiter) | (
                            np.abs(proposed_L - previous_proposed_L) < eps)
                    if proposed_L - current_L > eps:
                        current_L = proposed_L.copy()
                        if verbose:
                            print('Iteration %s, Split-Merge (%s,%s,%s) accepted, L = %.3f' % (
                                n_epoch, proposed_merge_split[0], proposed_merge_split[1], proposed_merge_split[2], current_L))
                        break
                    else:
                        self.weights = current_weights.copy()
                        self.cond_muv = current_cond_muv.copy()
                        self.gh = current_gh.copy()
                        self.muh = current_muh.copy()
                        self.cum_muh = self.cum_muh.copy()
                        self.logZ = current_logZ.copy()
                        if self.nature == 'Potts':
                            self.cum_cond_muv = current_cum_cond_muv.copy()

                        if verbose:
                            print('Iteration %s, Split-Merge (%s,%s,%s) denied, Proposed L = %.3f' % (
                                n_epoch, proposed_merge_split[0], proposed_merge_split[1], proposed_merge_split[2], proposed_L))
                converged2 = (np.abs(current_L - previous_L) <
                              eps) | (n_epoch >= 2 * maxiter)
        return current_L
Ejemplo n.º 17
0
    def fit_online(self, data, weights=None, batch_size=100, learning_rate=0.01, lr_final=None, n_iter=10, lr_decay=True, decay_after=0.5, verbose=1, shuffle_data=True, print_every=5, init_bias=0.001, init=None):
        n_samples = data.shape[0]
        n_batches = int(np.ceil(float(n_samples) / batch_size))
        batch_slices = list(utilities.gen_even_slices(n_batches * batch_size,
                                                      n_batches, n_samples))

        # learning_rate_init = copy.copy(learning_rate)
        self.learning_rate = learning_rate
        if lr_decay:
            start_decay = n_iter * decay_after
            if lr_final is None:
                lr_final = 1e-2 * learning_rate

            decay_gamma = (float(lr_final) / float(learning_rate)
                           )**(1 / float(n_iter * (1 - decay_after)))

        B = data.shape[0]
        if not init == 'previous':
            # initial_centroids = KMPP_choose_centroids(data,self.M)
            initial_centroids = np.argsort(np.random.rand(B))[:self.M]
            if self.nature == 'Bernoulli':
                self.weights += init_bias * (data[initial_centroids] - 0.5)
            elif self.nature == 'Spin':
                self.weights += 0.25 * init_bias * data[initial_centroids]
            elif self.nature == 'Potts':
                self.weights += init_bias * \
                    binarize(data[initial_centroids], self.n_c) - \
                    init_bias / self.n_c

            if self.nature == 'Bernoulli':
                self.muvh = np.ones(
                    [self.M, self.N], dtype=curr_float) / (2.0 * self.M)
            elif self.nature == 'Spin':
                self.muvh = np.zeros([self.M, self.N], dtype=curr_float)
            else:
                self.muvh = np.ones(
                    [self.M, self.N, self.n_c], dtype=curr_float) / (self.n_c * self.M)
        else:
            if not hasattr(self, 'muvh'):
                if self.nature == 'Potts':
                    self.muvh = self.cond_muv * \
                        self.muh[:, np.newaxis, np.newaxis]
                else:
                    self.muvh = self.cond_muv * self.muh[:, np.newaxis]

        if shuffle_data:
            if weights is not None:
                permute = np.arange(data.shape[0])
                self.random_state.shuffle(permute)
                weights = weights[permute]
                data = data[permute, :]
            else:
                self.random_state.shuffle(data)
        if verbose:
            print('Epoch 0: Lik = %.4f' % (utilities.average(
                self.likelihood(data), weights=weights) / self.N))

        for epoch in range(0, n_iter + 1):
            if verbose:
                begin = time.time()
                print('Starting epoch %s' % epoch)

            if epoch == 0:
                update = False
            else:
                update = True

            if lr_decay:
                if (epoch > start_decay):
                    self.learning_rate *= decay_gamma

            for batch_slice in batch_slices:
                if weights is None:
                    data_mini = data[batch_slice]
                    weights_mini = None
                else:
                    data_mini = data[batch_slice]
                    weights_mini = weights[batch_slice]
                self.minibatch_fit(
                    data_mini, weights=weights_mini, update=update)
            if verbose:
                t = time.time() - begin
                if epoch % print_every == 0:
                    print('Finished epoch %s: time =%.2f s, Lik = %.4f' % (
                        epoch, t, utilities.average(self.likelihood(data), weights=weights) / self.N))

            if shuffle_data:
                if weights is not None:
                    permute = np.arange(data.shape[0])
                    self.random_state.shuffle(permute)
                    weights = weights[permute]
                    data = data[permute, :]
                else:
                    self.random_state.shuffle(data)
Ejemplo n.º 18
0
    def print_status(self):
        print('Queue has been running for ' + str(self.time) + ' minutes')
        print('Successfully placed ' + str(self.successes * 12) + ' players.')
        print('Failed to place ' + str(len(self.waiting_room)) + ' players.')
        print('Skipped ' + str(
            len([
                player
                for player in self.waiting_room if player.tested == False
            ])) + ' players')

        def print_wait_times(wait_times, string):
            print('Number of ' + string + ' in queue: ' +
                  str(len(wait_times)) + ' player.')
            print('Average current ' + string + ' wait time: ' +
                  str(average(wait_times)) + ' minutes')
            print('Median current ' + string + ' wait time: ' +
                  str(median(wait_times)) + ' minutes')
            print('Modal current ' + string + ' wait time: ' +
                  str(mode(wait_times)) + ' minutes')
            print('Max current ' + string + ' wait time: ' +
                  str(max(wait_times)) + ' minutes')

        def print_ranks(player_list):
            bronze = [
                player for player in player_list
                if player.get_rank() == 'BRONZE'
            ]
            silver = [
                player for player in player_list
                if player.get_rank() == 'SILVER'
            ]
            gold = [
                player for player in player_list if player.get_rank() == 'GOLD'
            ]
            platinum = [
                player for player in player_list
                if player.get_rank() == 'PLATINUM'
            ]
            diamond = [
                player for player in player_list
                if player.get_rank() == 'DIAMOND'
            ]
            master = [
                player for player in player_list
                if player.get_rank() == 'MASTER'
            ]
            GM = [
                player for player in player_list if player.get_rank() == 'GM'
            ]
            print('Bronze: ' + str(len(bronze)))
            print('Silver: ' + str(len(silver)))
            print('Gold: ' + str(len(gold)))
            print('Platinum: ' + str(len(platinum)))
            print('Diamond: ' + str(len(diamond)))
            print('Master: ' + str(len(master)))
            print('GM: ' + str(len(GM)))

        all_wait_times = [
            player.current_wait_time for player in self.waiting_room
        ]
        print_wait_times(all_wait_times, 'player')
        dps_queue = [
            player for player in self.waiting_room
            if 'DPS' in player.active_roles
        ]
        dps_wait_times = [player.current_wait_time for player in dps_queue]
        print_wait_times(dps_wait_times, 'DPS')
        print_ranks(dps_queue)
        tank_queue = [
            player for player in self.waiting_room
            if 'TANK' in player.active_roles
        ]
        tank_wait_times = [player.current_wait_time for player in tank_queue]
        print_wait_times(tank_wait_times, 'Tank')
        print_ranks(tank_queue)
        support_queue = [
            player for player in self.waiting_room
            if 'SUPPORT' in player.active_roles
        ]
        support_wait_times = [
            player.current_wait_time for player in support_queue
        ]
        print_wait_times(support_wait_times, 'Support')
        print_ranks(support_queue)
        bronze_wait_times = [
            player.current_wait_time for player in self.waiting_room
            if player.get_rank() == 'BRONZE'
        ]
        print_wait_times(bronze_wait_times, 'Bronze')
        silver_wait_times = [
            player.current_wait_time for player in self.waiting_room
            if player.get_rank() == 'SILVER'
        ]
        print_wait_times(silver_wait_times, 'Silver')
        gold_wait_times = [
            player.current_wait_time for player in self.waiting_room
            if player.get_rank() == 'GOLD'
        ]
        print_wait_times(gold_wait_times, 'Gold')
        platinum_wait_times = [
            player.current_wait_time for player in self.waiting_room
            if player.get_rank() == 'PLATINUM'
        ]
        print_wait_times(platinum_wait_times, 'Platinum')
        diamond_wait_times = [
            player.current_wait_time for player in self.waiting_room
            if player.get_rank() == 'DIAMOND'
        ]
        print_wait_times(diamond_wait_times, 'Diamond')
        master_wait_times = [
            player.current_wait_time for player in self.waiting_room
            if player.get_rank() == 'MASTER'
        ]
        print_wait_times(master_wait_times, 'Master')
        gm_wait_times = [
            player.current_wait_time for player in self.waiting_room
            if player.get_rank() == 'GM'
        ]
        print_wait_times(gm_wait_times, 'GM')
        game_SR_ranges = [game.SR_range for game in self.active_games]
        average_SR_range = average(game_SR_ranges)
        print('Average active game SR range: ' + str(average_SR_range) + ' SR')
        median_SR_range = game_SR_ranges[int(len(game_SR_ranges) / 2)]
        print('Median active game SR range: ' + str(median_SR_range) + ' SR')
        max_SR_range = max(game_SR_ranges)
        print('Max active game SR range: ' + str(max_SR_range) + ' SR')
Ejemplo n.º 19
0
def get_cross_derivatives_ReLU(V_pos, psi_pos, hlayer, n_cv, weights=None):
    db_dw = average(V_pos, c=n_cv, weights=weights)
    a = hlayer.gamma[np.newaxis, :]
    theta = hlayer.delta[np.newaxis, :]
    b = hlayer.theta[np.newaxis, :]

    psi = psi_pos

    psi_plus = (-(psi - b) + theta) / np.sqrt(a)
    psi_minus = ((psi - b) + theta) / np.sqrt(a)

    Phi_plus = erf_times_gauss(psi_plus)
    Phi_minus = erf_times_gauss(psi_minus)

    p_plus = 1 / (1 + Phi_minus / Phi_plus)
    p_minus = 1 - p_plus

    e = (psi - b) - theta * (p_plus - p_minus)
    v = p_plus * p_minus * (2 * theta / np.sqrt(a)) * \
        (2 * theta / np.sqrt(a) - 1 / Phi_plus - 1 / Phi_minus)

    dpsi_plus_dpsi = -1 / np.sqrt(a)
    dpsi_minus_dpsi = 1 / np.sqrt(a)
    dpsi_plus_dtheta = 1 / np.sqrt(a)
    dpsi_minus_dtheta = 1 / np.sqrt(a)
    dpsi_plus_da = -1.0 / (2 * a) * psi_plus
    dpsi_minus_da = -1.0 / (2 * a) * psi_minus

    d2psi_plus_dadpsi = 0.5 / np.sqrt(a**3)
    d2psi_plus_dthetadpsi = 0
    d2psi_minus_dadpsi = -0.5 / np.sqrt(a**3)
    d2psi_minus_dthetadpsi = 0

    dp_plus_dpsi = p_plus * p_minus * \
        ((psi_plus - 1 / Phi_plus) * dpsi_plus_dpsi -
         (psi_minus - 1 / Phi_minus) * dpsi_minus_dpsi)
    dp_plus_dtheta = p_plus * p_minus * \
        ((psi_plus - 1 / Phi_plus) * dpsi_plus_dtheta -
         (psi_minus - 1 / Phi_minus) * dpsi_minus_dtheta)
    dp_plus_da = p_plus * p_minus * \
        ((psi_plus - 1 / Phi_plus) * dpsi_plus_da -
         (psi_minus - 1 / Phi_minus) * dpsi_minus_da)

    d2p_plus_dpsi2 = -(p_plus - p_minus) * p_plus * p_minus * ((psi_plus - 1 / Phi_plus) * dpsi_plus_dpsi - (psi_minus - 1 / Phi_minus) * dpsi_minus_dpsi)**2 \
        + p_plus * p_minus * ((dpsi_plus_dpsi)**2 * (1 + (psi_plus - 1 / Phi_plus) / Phi_plus) - (
            dpsi_minus_dpsi)**2 * (1 + (psi_minus - 1 / Phi_minus) / Phi_minus))

    d2p_plus_dadpsi = -(p_plus - p_minus) * ((psi_plus - 1 / Phi_plus) * dpsi_plus_dpsi - (psi_minus - 1 / Phi_minus) * dpsi_minus_dpsi) * (dp_plus_da)\
        + p_plus * p_minus * ((dpsi_plus_dpsi * dpsi_plus_da) * (1 + (psi_plus - 1 / Phi_plus) / Phi_plus) - (dpsi_minus_dpsi * dpsi_minus_da) * (1 + (psi_minus - 1 / Phi_minus) / Phi_minus)
                              + (d2psi_plus_dadpsi) * (psi_plus - 1 / Phi_plus) - (d2psi_minus_dadpsi) * (psi_minus - 1 / Phi_minus))

    d2p_plus_dthetadpsi = -(p_plus - p_minus) * ((psi_plus - 1 / Phi_plus) * dpsi_plus_dpsi - (psi_minus - 1 / Phi_minus) * dpsi_minus_dpsi) * (dp_plus_dtheta)\
        + p_plus * p_minus * ((dpsi_plus_dpsi * dpsi_plus_dtheta) * (1 + (psi_plus - 1 / Phi_plus) / Phi_plus) - (dpsi_minus_dpsi * dpsi_minus_dtheta) * (1 + (psi_minus - 1 / Phi_minus) / Phi_minus)
                              + (d2psi_plus_dthetadpsi) * (psi_plus - 1 / Phi_plus) - (d2psi_minus_dthetadpsi) * (psi_minus - 1 / Phi_minus))

    # dlogZ_dpsi = (p_plus * (psi_plus - 1 / Phi_plus) * dpsi_plus_dpsi +
    #               p_minus * (psi_minus - 1 / Phi_minus) * dpsi_minus_dpsi)
    # dlogZ_dtheta = (p_plus * (psi_plus - 1 / Phi_plus) * dpsi_plus_dtheta +
    #                 p_minus * (psi_minus - 1 / Phi_minus) * dpsi_minus_dtheta)
    # dlogZ_da = (p_plus * (psi_plus - 1 / Phi_plus) * dpsi_plus_da +
    #             p_minus * (psi_minus - 1 / Phi_minus) * dpsi_minus_da)

    de_dpsi = (1 + v)
    de_db = -de_dpsi
    de_da = 2 * (-theta) * dp_plus_da
    de_dtheta = -(p_plus - p_minus) + 2 * (-theta) * dp_plus_dtheta

    dv_dpsi = 2 * (-theta) * d2p_plus_dpsi2
    dv_db = -dv_dpsi

    dv_da = +2 * (-theta) * d2p_plus_dadpsi

    dv_dtheta = - 2 * dp_plus_dpsi \
        + 2 * (- theta) * d2p_plus_dthetadpsi

    var_e = average(e**2, weights=weights) - average(e, weights=weights)**2
    mean_v = average(v, weights=weights)

    dmean_v_da = average(dv_da, weights=weights)
    dmean_v_db = average(dv_db, weights=weights)
    dmean_v_dtheta = average(dv_dtheta, weights=weights)

    dvar_e_da = 2 * (
        average(e * de_da, weights=weights) -
        average(e, weights=weights) * average(de_da, weights=weights))
    dvar_e_db = 2 * (
        average(e * de_db, weights=weights) -
        average(e, weights=weights) * average(de_db, weights=weights))
    dvar_e_dtheta = 2 * (
        average(e * de_dtheta, weights=weights) -
        average(e, weights=weights) * average(de_dtheta, weights=weights))

    tmp = np.sqrt((1 + mean_v)**2 + 4 * var_e)
    da_db = (dvar_e_db + 0.5 * dmean_v_db * (1 + mean_v + tmp)) / \
        (tmp - dvar_e_da - 0.5 * dmean_v_da * (1 + mean_v + tmp))
    da_dtheta = (dvar_e_dtheta + 0.5 * dmean_v_dtheta *
                 (1 + mean_v + tmp)) / (tmp - dvar_e_da - 0.5 * dmean_v_da *
                                        (1 + mean_v + tmp))

    dmean_v_dw = average_product(dv_dpsi,
                                 V_pos,
                                 c1=1,
                                 c2=n_cv,
                                 weights=weights)

    if n_cv > 1:
        dvar_e_dw = 2 * (
            average_product(e * de_dpsi, V_pos, c1=1, c2=n_cv, weights=weights)
            - average(e, weights=weights)[:, np.newaxis, np.newaxis] *
            average_product(de_dpsi, V_pos, c1=1, c2=n_cv, weights=weights))
        da_dw = (dvar_e_dw + 0.5 * dmean_v_dw *
                 (1 + mean_v + tmp)[:, np.newaxis, np.newaxis]) / (
                     tmp - dvar_e_da - 0.5 * dmean_v_da *
                     (1 + mean_v + tmp))[:, np.newaxis, np.newaxis]

    else:
        dvar_e_dw = 2 * (
            average_product(e * de_dpsi, V_pos, c1=1, c2=1, weights=weights) -
            average(e, weights=weights)[:, np.newaxis] *
            average_product(de_dpsi, V_pos, c1=1, c2=1, weights=weights))
        da_dw = (dvar_e_dw + 0.5 * dmean_v_dw *
                 (1 + mean_v + tmp)[:, np.newaxis]) / (
                     tmp - dvar_e_da - 0.5 * dmean_v_da *
                     (1 + mean_v + tmp))[:, np.newaxis]

    return db_dw, da_db, da_dtheta, da_dw
Ejemplo n.º 20
0
def get_cross_derivatives_dReLU(V_pos, psi_pos, hlayer, n_cv, weights=None):
    # a = 2.0/(1.0/hlayer.a_plus + 1.0/hlayer.a_minus)
    # eta = 0.5* (a/hlayer.a_plus - a/hlayer.a_minus)
    # theta = (1.-eta**2)/2. * (hlayer.theta_plus+hlayer.theta_minus)
    # b = (1.+eta)/2. * hlayer.theta_plus - (1.-eta)/2. * hlayer.theta_minus
    db_dw = average(V_pos, c=n_cv, weights=weights)
    a = hlayer.a[np.newaxis, :]
    eta = hlayer.eta[np.newaxis, :]
    theta = hlayer.theta[np.newaxis, :]
    b = hlayer.b[np.newaxis, :]

    psi = psi_pos

    psi_plus = (-np.sqrt(1 + eta) *
                (psi - b) + theta / np.sqrt(1 + eta)) / np.sqrt(a)
    psi_minus = (np.sqrt(1 - eta) *
                 (psi - b) + theta / np.sqrt(1 - eta)) / np.sqrt(a)

    Phi_plus = erf_times_gauss(psi_plus)
    Phi_minus = erf_times_gauss(psi_minus)

    Z = Phi_plus * np.sqrt(1 + eta) + Phi_minus * np.sqrt(1 - eta)

    p_plus = 1 / (1 + (Phi_minus * np.sqrt(1 - eta)) /
                  (Phi_plus * np.sqrt(1 + eta)))
    nans = np.isnan(p_plus)
    p_plus[nans] = 1.0 * (np.abs(psi_plus[nans]) > np.abs(psi_minus[nans]))
    p_minus = 1 - p_plus

    e = (psi - b) * (1 + eta * (p_plus - p_minus)) - theta * (
        p_plus - p_minus) + 2 * eta * np.sqrt(a) / Z
    v = eta * (p_plus - p_minus) + p_plus * p_minus * (
        2 * theta / np.sqrt(a) - 2 * eta * (psi - b) / np.sqrt(a)) * (
            2 * theta / np.sqrt(a) - 2 * eta *
            (psi - b) / np.sqrt(a) - np.sqrt(1 + eta) / Phi_plus -
            np.sqrt(1 - eta) / Phi_minus) - 2 * eta * e / (np.sqrt(a) * Z)

    dpsi_plus_dpsi = -np.sqrt((1 + eta) / a)
    dpsi_minus_dpsi = np.sqrt((1 - eta) / a)
    dpsi_plus_dtheta = 1 / np.sqrt(a * (1 + eta))
    dpsi_minus_dtheta = 1 / np.sqrt(a * (1 - eta))
    # dpsi_plus_da = -1.0/(2*a) * psi_plus
    # dpsi_minus_da = -1.0/(2*a) * psi_minus

    dpsi_plus_deta = -1.0 / (2 * np.sqrt(a * (1 + eta))) * ((psi - b) + theta /
                                                            (1 + eta))
    dpsi_minus_deta = -1.0 / (2 * np.sqrt(a *
                                          (1 - eta))) * ((psi - b) - theta /
                                                         (1 - eta))

    # d2psi_plus_dadpsi = 0.5 * np.sqrt((1+eta)/a**3 )
    d2psi_plus_dthetadpsi = 0
    d2psi_plus_detadpsi = -0.5 / np.sqrt((1 + eta) * a)
    # d2psi_minus_dadpsi = -0.5 * np.sqrt((1-eta)/a**3 )
    d2psi_minus_dthetadpsi = 0
    d2psi_minus_detadpsi = -0.5 / np.sqrt((1 - eta) * a)

    dp_plus_dpsi = p_plus * p_minus * (
        (psi_plus - 1 / Phi_plus) * dpsi_plus_dpsi -
        (psi_minus - 1 / Phi_minus) * dpsi_minus_dpsi)
    dp_plus_dtheta = p_plus * p_minus * (
        (psi_plus - 1 / Phi_plus) * dpsi_plus_dtheta -
        (psi_minus - 1 / Phi_minus) * dpsi_minus_dtheta)
    # dp_plus_da = p_plus * p_minus * ( (psi_plus-1/Phi_plus) * dpsi_plus_da  - (psi_minus-1/Phi_minus) * dpsi_minus_da )
    dp_plus_deta = p_plus * p_minus * (
        (psi_plus - 1 / Phi_plus) * dpsi_plus_deta -
        (psi_minus - 1 / Phi_minus) * dpsi_minus_deta + 1 / (1 - eta**2))


    d2p_plus_dpsi2 = -(p_plus-p_minus) * p_plus * p_minus * ( (psi_plus-1/Phi_plus) * dpsi_plus_dpsi  - (psi_minus-1/Phi_minus) * dpsi_minus_dpsi )**2 \
    + p_plus * p_minus * ( (dpsi_plus_dpsi)**2 *  (1+ (psi_plus-1/Phi_plus)/Phi_plus) - (dpsi_minus_dpsi)**2 * (1+ (psi_minus-1/Phi_minus)/Phi_minus) )

    # d2p_plus_dadpsi = -(p_plus-p_minus) * ( (psi_plus-1/Phi_plus) * dpsi_plus_dpsi  - (psi_minus-1/Phi_minus) * dpsi_minus_dpsi ) * (dp_plus_da)\
    # + p_plus * p_minus * ( (dpsi_plus_dpsi* dpsi_plus_da) *  (1+ (psi_plus-1/Phi_plus)/Phi_plus) - (dpsi_minus_dpsi *dpsi_minus_da) * (1+ (psi_minus-1/Phi_minus)/Phi_minus) \
    # + (d2psi_plus_dadpsi) * (psi_plus-1/Phi_plus) - (d2psi_minus_dadpsi) * (psi_minus-1/Phi_minus) )

    d2p_plus_dthetadpsi = -(p_plus-p_minus) * ( (psi_plus-1/Phi_plus) * dpsi_plus_dpsi  - (psi_minus-1/Phi_minus) * dpsi_minus_dpsi ) * (dp_plus_dtheta)\
    + p_plus * p_minus * ( (dpsi_plus_dpsi* dpsi_plus_dtheta) *  (1+ (psi_plus-1/Phi_plus)/Phi_plus) - (dpsi_minus_dpsi *dpsi_minus_dtheta) * (1+ (psi_minus-1/Phi_minus)/Phi_minus) \
    + (d2psi_plus_dthetadpsi) * (psi_plus-1/Phi_plus) - (d2psi_minus_dthetadpsi) * (psi_minus-1/Phi_minus) )

    d2p_plus_detadpsi = -(p_plus-p_minus) * ( (psi_plus-1/Phi_plus) * dpsi_plus_dpsi  - (psi_minus-1/Phi_minus) * dpsi_minus_dpsi ) * (dp_plus_deta)\
    + p_plus * p_minus * ( (dpsi_plus_dpsi* dpsi_plus_deta) *  (1+ (psi_plus-1/Phi_plus)/Phi_plus) - (dpsi_minus_dpsi *dpsi_minus_deta) * (1+ (psi_minus-1/Phi_minus)/Phi_minus) \
    + (d2psi_plus_detadpsi) * (psi_plus-1/Phi_plus) - (d2psi_minus_detadpsi) * (psi_minus-1/Phi_minus) )

    dlogZ_dpsi = (p_plus * (psi_plus - 1 / Phi_plus) * dpsi_plus_dpsi +
                  p_minus * (psi_minus - 1 / Phi_minus) * dpsi_minus_dpsi)
    dlogZ_dtheta = (p_plus * (psi_plus - 1 / Phi_plus) * dpsi_plus_dtheta +
                    p_minus * (psi_minus - 1 / Phi_minus) * dpsi_minus_dtheta)
    # dlogZ_da = (p_plus * (psi_plus-1/Phi_plus)* dpsi_plus_da + p_minus * (psi_minus-1/Phi_minus) * dpsi_minus_da )
    dlogZ_deta = (p_plus * (psi_plus - 1 / Phi_plus) * dpsi_plus_deta +
                  p_minus * (psi_minus - 1 / Phi_minus) * dpsi_minus_deta +
                  0.5 * (p_plus / (1 + eta) - p_minus / (1 - eta)))

    de_dpsi = (1 + v)
    de_db = -de_dpsi
    # de_da = 2*((psi-b) * eta - theta) * dp_plus_da + eta/(Z*np.sqrt(a)) - 2*eta*np.sqrt(a)/Z * dlogZ_da
    de_dtheta = -(p_plus - p_minus) + 2 * (
        (psi - b) * eta -
        theta) * dp_plus_dtheta - 2 * eta * np.sqrt(a) / Z * dlogZ_dtheta
    de_deta = (psi - b) * (p_plus - p_minus) + 2 * (
        (psi - b) * eta - theta) * dp_plus_deta + 2 * np.sqrt(
            a) / Z - 2 * eta * np.sqrt(a) / Z * dlogZ_deta


    dv_dpsi = 4 * eta * dp_plus_dpsi\
    + 2*( (psi-b)*eta-theta) * d2p_plus_dpsi2 \
    - 2* eta/(np.sqrt(a)*Z) * ( de_dpsi - e*dlogZ_dpsi )

    dv_db = -dv_dpsi

    # dv_da = eta * 2 * dp_plus_da \
    # + 2 * ((psi-b)*eta - theta) * d2p_plus_dadpsi \
    # -2 * eta/(Z * np.sqrt(a)) * ( -e/(2*a) - e*dlogZ_da + de_da )

    dv_dtheta =  2 * eta *  dp_plus_dtheta \
    - 2 * dp_plus_dpsi \
    + 2 * ((psi-b)*eta - theta) * d2p_plus_dthetadpsi \
    -2 * eta/(Z * np.sqrt(a)) * ( - e*dlogZ_dtheta + de_dtheta )

    dv_deta = (p_plus-p_minus) \
    + 2 * eta * dp_plus_deta \
    + 2 * (psi-b) * dp_plus_dpsi \
    + 2 * ((psi-b)*eta - theta) * d2p_plus_detadpsi \
    -2 * 1/(Z * np.sqrt(a)) * (e - e*eta*dlogZ_deta + eta*de_deta )

    var_e = average(e**2, weights=weights) - average(e, weights=weights)**2
    mean_v = average(v, weights=weights)

    # dmean_v_da = average(dv_da,weights=weights)
    dmean_v_db = average(dv_db, weights=weights)
    dmean_v_dtheta = average(dv_dtheta, weights=weights)
    dmean_v_deta = average(dv_deta, weights=weights)

    # dvar_e_da = 2* (average(e*de_da,weights=weights) -average(e,weights=weights) * average(de_da,weights=weights) )
    dvar_e_db = 2 * (
        average(e * de_db, weights=weights) -
        average(e, weights=weights) * average(de_db, weights=weights))
    dvar_e_dtheta = 2 * (
        average(e * de_dtheta, weights=weights) -
        average(e, weights=weights) * average(de_dtheta, weights=weights))
    dvar_e_deta = 2 * (
        average(e * de_deta, weights=weights) -
        average(e, weights=weights) * average(de_deta, weights=weights))

    tmp = np.sqrt((1 + mean_v)**2 + 4 * var_e)
    denominator = tmp
    # denominator = (tmp - dvar_e_da- 0.5 * dmean_v_da * (1+mean_v+tmp))
    # denominator = np.maximum( denominator, 0.5) # For numerical stability.

    da_db = (dvar_e_db + 0.5 * dmean_v_db * (1 + mean_v + tmp)) / denominator
    da_dtheta = (dvar_e_dtheta + 0.5 * dmean_v_dtheta *
                 (1 + mean_v + tmp)) / denominator
    da_deta = (dvar_e_deta + 0.5 * dmean_v_deta *
               (1 + mean_v + tmp)) / denominator

    dmean_v_dw = average_product(dv_dpsi,
                                 V_pos,
                                 c1=1,
                                 c2=n_cv,
                                 weights=weights)

    if n_cv > 1:
        dvar_e_dw = 2 * (
            average_product(e * de_dpsi, V_pos, c1=1, c2=n_cv, weights=weights)
            - average(e, weights=weights)[:, np.newaxis, np.newaxis] *
            average_product(de_dpsi, V_pos, c1=1, c2=n_cv, weights=weights))
        da_dw = (dvar_e_dw + 0.5 * dmean_v_dw *
                 (1 + mean_v + tmp)[:, np.newaxis, np.newaxis]
                 ) / denominator[:, np.newaxis, np.newaxis]

    else:
        dvar_e_dw = 2 * (
            average_product(e * de_dpsi, V_pos, c1=1, c2=1, weights=weights) -
            average(e, weights=weights)[:, np.newaxis] *
            average_product(de_dpsi, V_pos, c1=1, c2=1, weights=weights))
        da_dw = (dvar_e_dw + 0.5 * dmean_v_dw *
                 (1 + mean_v + tmp)[:, np.newaxis]) / denominator[:,
                                                                  np.newaxis]

    return db_dw, da_db, da_dtheta, da_deta, da_dw
Ejemplo n.º 21
0
def get_cross_derivatives_ReLU_plus(V_pos,
                                    psi_pos,
                                    hlayer,
                                    n_cv,
                                    weights=None):
    db_dw = average(V_pos, c=n_cv, weights=weights)

    a = hlayer.gamma[np.newaxis, :]
    b = hlayer.theta[np.newaxis, :]

    psi = psi_pos
    psi_plus = -(psi - b) / np.sqrt(a)

    Phi_plus = erf_times_gauss(psi_plus)

    e = (psi - b) + np.sqrt(a) / Phi_plus
    v = (psi_plus - 1 / Phi_plus) / Phi_plus

    dpsi_plus_dpsi = -1 / np.sqrt(a)
    dpsi_plus_da = -1.0 / (2 * a) * psi_plus

    de_dpsi = 1 + v
    de_db = -de_dpsi
    de_da = np.sqrt(a) * (1.0 / (2 * a * Phi_plus) -
                          (psi_plus - 1 / Phi_plus) / Phi_plus * dpsi_plus_da)

    dv_dpsi = dpsi_plus_dpsi * \
        (1 + psi_plus / Phi_plus - 1 / Phi_plus **
         2 - (psi_plus - 1 / Phi_plus)**2) / Phi_plus
    dv_db = -dv_dpsi
    dv_da = dpsi_plus_da * (1 + psi_plus / Phi_plus - 1 / Phi_plus**2 -
                            (psi_plus - 1 / Phi_plus)**2) / Phi_plus

    var_e = average(e**2, weights=weights) - average(e, weights=weights)**2
    mean_v = average(v, weights=weights)

    dmean_v_da = average(dv_da, weights=weights)
    dmean_v_db = average(dv_db, weights=weights)

    dvar_e_da = 2 * (
        average(e * de_da, weights=weights) -
        average(e, weights=weights) * average(de_da, weights=weights))
    dvar_e_db = 2 * (
        average(e * de_db, weights=weights) -
        average(e, weights=weights) * average(de_db, weights=weights))

    tmp = np.sqrt((1 + mean_v)**2 + 4 * var_e)
    denominator = (tmp - dvar_e_da - 0.5 * dmean_v_da * (1 + mean_v + tmp))
    denominator = np.maximum(denominator, 0.5)  # For numerical stability.

    da_db = (dvar_e_db + 0.5 * dmean_v_db * (1 + mean_v + tmp)) / denominator

    dmean_v_dw = average_product(dv_dpsi,
                                 V_pos,
                                 c1=1,
                                 c2=n_cv,
                                 weights=weights)

    if n_cv > 1:
        dvar_e_dw = 2 * (
            average_product(e * de_dpsi, V_pos, c1=1, c2=n_cv, weights=weights)
            - average(e, weights=weights)[:, np.newaxis, np.newaxis] *
            average_product(de_dpsi, V_pos, c1=1, c2=n_cv, weights=weights))
        da_dw = (dvar_e_dw + 0.5 * dmean_v_dw *
                 (1 + mean_v + tmp)[:, np.newaxis, np.newaxis]
                 ) / denominator[:, np.newaxis, np.newaxis]

    else:
        dvar_e_dw = 2 * (
            average_product(e * de_dpsi, V_pos, c1=1, c2=1, weights=weights) -
            average(e, weights=weights)[:, np.newaxis] *
            average_product(de_dpsi, V_pos, c1=1, c2=1, weights=weights))
        da_dw = (dvar_e_dw + 0.5 * dmean_v_dw *
                 (1 + mean_v + tmp)[:, np.newaxis]) / denominator[:,
                                                                  np.newaxis]

    return db_dw, da_db, da_dw
Ejemplo n.º 22
0
def assess_moment_matching(RBM,
                           data,
                           data_gen,
                           datah_gen=None,
                           weights=None,
                           weights_neg=None,
                           with_reg=True,
                           show=True):
    h_data = RBM.mean_hiddens(data)
    if datah_gen is not None:
        h_gen = datah_gen
    else:
        h_gen = RBM.mean_hiddens(data_gen)
    mu = utilities.average(data, c=RBM.n_cv, weights=weights)
    if datah_gen is not None:
        condmu_gen = RBM.mean_visibles(datah_gen)
        mu_gen = utilities.average(condmu_gen, weights=weights_neg)
    else:
        mu_gen = utilities.average(data_gen, c=RBM.n_cv, weights=weights_neg)

    mu_h = utilities.average(h_data, weights=weights)
    mu_h_gen = utilities.average(h_gen, weights=weights_neg)

    if RBM.n_cv > 1:
        cov_vh = utilities.average_product(
            h_data, data, c2=RBM.n_cv, weights=weights
        ) - mu[np.newaxis, :, :] * mu_h[:, np.newaxis, np.newaxis]
    else:
        cov_vh = utilities.average_product(
            h_data, data, c2=RBM.n_cv,
            weights=weights) - mu[np.newaxis, :] * mu_h[:, np.newaxis]

    if datah_gen is not None:
        if RBM.n_cv > 1:
            cov_vh_gen = utilities.average_product(
                datah_gen,
                condmu_gen,
                mean2=True,
                c2=RBM.n_cv,
                weights=weights_neg
            ) - mu_gen[np.newaxis, :, :] * mu_h_gen[:, np.newaxis, np.newaxis]
        else:
            cov_vh_gen = utilities.average_product(
                datah_gen,
                condmu_gen,
                mean2=True,
                c2=RBM.n_cv,
                weights=weights_neg
            ) - mu_gen[np.newaxis, :] * mu_h_gen[:, np.newaxis]
    else:
        if RBM.n_cv > 1:
            cov_vh_gen = utilities.average_product(
                h_gen, data_gen, c2=RBM.n_cv, weights=weights_neg
            ) - mu_gen[np.newaxis, :, :] * mu_h_gen[:, np.newaxis, np.newaxis]
        else:
            cov_vh_gen = utilities.average_product(
                h_gen, data_gen, c2=RBM.n_cv, weights=weights_neg
            ) - mu_gen[np.newaxis, :] * mu_h_gen[:, np.newaxis]

    if RBM.hidden == 'dReLU':
        I_data = RBM.vlayer.compute_output(data, RBM.weights)
        I_gen = RBM.vlayer.compute_output(data_gen, RBM.weights)

        mu_p_pos, mu_n_pos, mu2_p_pos, mu2_n_pos = RBM.hlayer.mean12_pm_from_inputs(
            I_data)
        mu_p_pos = utilities.average(mu_p_pos, weights=weights)
        mu_n_pos = utilities.average(mu_n_pos, weights=weights)
        mu2_p_pos = utilities.average(mu2_p_pos, weights=weights)
        mu2_n_pos = utilities.average(mu2_n_pos, weights=weights)

        mu_p_neg, mu_n_neg, mu2_p_neg, mu2_n_neg = RBM.hlayer.mean12_pm_from_inputs(
            I_gen)
        mu_p_neg = utilities.average(mu_p_neg, weights=weights_neg)
        mu_n_neg = utilities.average(mu_n_neg, weights=weights_neg)
        mu2_p_neg = utilities.average(mu2_p_neg, weights=weights_neg)
        mu2_n_neg = utilities.average(mu2_n_neg, weights=weights_neg)

        a = RBM.hlayer.gamma
        eta = RBM.hlayer.eta
        theta = RBM.hlayer.delta

        moment_theta = -mu_p_pos / np.sqrt(1 + eta) + mu_n_pos / np.sqrt(1 -
                                                                         eta)
        moment_theta_gen = -mu_p_neg / np.sqrt(1 + eta) + mu_n_neg / np.sqrt(
            1 - eta)
        moment_eta = 0.5 * a / (1 + eta)**2 * mu2_p_pos - 0.5 * a / (
            1 - eta)**2 * mu2_n_pos + theta / (
                2 * np.sqrt(1 + eta)**3) * mu_p_pos - theta / (
                    2 * np.sqrt(1 - eta)**3) * mu_n_pos
        moment_eta_gen = 0.5 * a / (1 + eta)**2 * mu2_p_neg - 0.5 * a / (
            1 - eta)**2 * mu2_n_neg + theta / (
                2 * np.sqrt(1 + eta)**3) * mu_p_neg - theta / (
                    2 * np.sqrt(1 - eta)**3) * mu_n_neg

        moment_theta *= -1
        moment_theta_gen *= -1
        moment_eta *= -1
        moment_eta_gen *= -1

    W = RBM.weights
    if with_reg:
        l2 = RBM.l2
        l1 = RBM.l1
        l1b = RBM.l1b
        l1c = RBM.l1c
        l1_custom = RBM.l1_custom
        l1b_custom = RBM.l1b_custom
        n_c2 = RBM.n_cv
        if l2 > 0:
            cov_vh_gen += l2 * W
        if l1 > 0:
            cov_vh_gen += l1 * np.sign(W)
        if l1b > 0:  # NOT SUPPORTED FOR POTTS
            if n_c2 > 1:  # Potts RBM.
                cov_vh_gen += l1b * np.sign(W) * np.abs(W).mean(-1).mean(
                    -1)[:, np.newaxis, np.newaxis]
            else:
                cov_vh_gen += l1b * np.sign(W) * (np.abs(W).sum(1))[:,
                                                                    np.newaxis]
        if l1c > 0:  # NOT SUPPORTED FOR POTTS
            cov_vh_gen += l1c * np.sign(W) * (
                (np.abs(W).sum(1))**2)[:, np.newaxis]

        if any([l1 > 0, l1b > 0, l1c > 0]):
            mask_cov = np.abs(W) > 1e-3
        else:
            mask_cov = np.ones(W.shape, dtype='bool')
    else:
        mask_cov = np.ones(W.shape, dtype='bool')

    if RBM.n_cv > 1:
        if RBM.n_cv == 21:
            list_aa = Proteins_utils.aa
        else:
            list_aa = Proteins_utils.aa[:-1]
        colors_template = np.array([
            matplotlib.colors.to_rgba(aa_color_scatter(letter))
            for letter in list_aa
        ])
        color = np.repeat(colors_template[np.newaxis, :, :],
                          data.shape[1],
                          axis=0).reshape([data.shape[1] * RBM.n_cv, 4])
    else:
        color = 'C0'

    s2 = 14

    if RBM.hidden == 'dReLU':
        fig, ax = plt.subplots(3, 2)
        fig.set_figheight(3 * 5)
        fig.set_figwidth(2 * 5)
    else:
        fig, ax = plt.subplots(2, 2)
        fig.set_figheight(2 * 5)
        fig.set_figwidth(2 * 5)
    clean_ax(ax[1, 1])

    ax_ = ax[0, 0]
    ax_.scatter(mu.flatten(), mu_gen.flatten(), c=color)
    ax_.plot([mu.min(), mu.max()], [mu.min(), mu.max()])
    ax_.set_xlabel(r'$<v_i>_d$', fontsize=s2)
    ax_.set_ylabel(r'$<v_i>_m$', fontsize=s2)
    r2_mu = np.corrcoef(mu.flatten(), mu_gen.flatten())[0, 1]**2
    error_mu = np.sqrt(((mu - mu_gen)**2 / (mu * (1 - mu) + 1e-4)).mean())
    mini = mu.min()
    maxi = mu.max()
    ax_.text(0.6 * maxi + 0.4 * mini,
             0.25 * maxi + 0.75 * mini,
             r'$R^2 = %.2f$' % r2_mu,
             fontsize=s2)
    ax_.text(0.6 * maxi + 0.4 * mini,
             0.35 * maxi + 0.65 * mini,
             r'$Err = %.2e$' % error_mu,
             fontsize=s2)
    ax_.set_title('Mean visibles', fontsize=s2)

    ax_ = ax[0, 1]
    ax_.scatter(mu_h, mu_h_gen)
    ax_.plot([mu_h.min(), mu_h.max()], [mu_h.min(), mu_h.max()])
    ax_.set_xlabel(r'$<h_\mu>_d$', fontsize=s2)
    ax_.set_ylabel(r'$<h_\mu>_m$', fontsize=s2)
    r2_muh = np.corrcoef(mu_h, mu_h_gen)[0, 1]**2
    error_muh = np.sqrt(((mu_h - mu_h_gen)**2).mean())
    mini = mu_h.min()
    maxi = mu_h.max()
    ax_.text(0.6 * maxi + 0.4 * mini,
             0.25 * maxi + 0.75 * mini,
             r'$R^2 = %.2f$' % r2_muh,
             fontsize=s2)
    ax_.text(0.6 * maxi + 0.4 * mini,
             0.35 * maxi + 0.65 * mini,
             r'$Err = %.2e$' % error_muh,
             fontsize=s2)
    ax_.set_title('Mean hiddens', fontsize=s2)

    ax_ = ax[1, 0]
    if RBM.n_cv > 1:
        color = np.repeat(np.repeat(colors_template[np.newaxis,
                                                    np.newaxis, :, :],
                                    RBM.n_h,
                                    axis=0),
                          data.shape[1],
                          axis=1).reshape([RBM.n_v * RBM.n_h * RBM.n_cv, 4])
        color = color[mask_cov.flatten()]
    else:
        color = 'C0'

    cov_vh = cov_vh[mask_cov].flatten()
    cov_vh_gen = cov_vh_gen[mask_cov].flatten()

    ax_.scatter(cov_vh, cov_vh_gen, c=color)
    ax_.plot([cov_vh.min(), cov_vh.max()], [cov_vh.min(), cov_vh.max()])
    ax_.set_xlabel(r'Cov$(v_i \;, h_\mu)_d$', fontsize=s2)
    ax_.set_ylabel(r'Cov$(v_i \;, h_\mu)_m + \nabla_{w_{\mu i}} \mathcal{R}$',
                   fontsize=s2)
    r2_vh = np.corrcoef(cov_vh, cov_vh_gen)[0, 1]**2
    error_vh = np.sqrt(((cov_vh - cov_vh_gen)**2).mean())
    mini = cov_vh.min()
    maxi = cov_vh.max()
    ax_.text(0.6 * maxi + 0.4 * mini,
             0.25 * maxi + 0.75 * mini,
             r'$R^2 = %.2f$' % r2_vh,
             fontsize=s2)
    ax_.text(0.6 * maxi + 0.4 * mini,
             0.35 * maxi + 0.65 * mini,
             r'$Err = %.2e$' % error_vh,
             fontsize=s2)
    ax_.set_title('Hiddens-Visibles correlations', fontsize=s2)

    if RBM.hidden == 'dReLU':
        ax_ = ax[2, 0]
        ax_.scatter(moment_theta, moment_theta_gen, c=theta)
        ax_.plot([moment_theta.min(), moment_theta.max()],
                 [moment_theta.min(), moment_theta.max()])
        ax_.set_xlabel(r'$<-\frac{\partial E}{\partial \theta}>_d$',
                       fontsize=s2)
        ax_.set_ylabel(r'$<-\frac{\partial E}{\partial \theta}>_m$',
                       fontsize=s2)
        r2_theta = np.corrcoef(moment_theta, moment_theta_gen)[0, 1]**2
        error_theta = np.sqrt(((moment_theta - moment_theta_gen)**2).mean())
        mini = moment_theta.min()
        maxi = moment_theta.max()
        ax_.text(0.6 * maxi + 0.4 * mini,
                 0.25 * maxi + 0.75 * mini,
                 r'$R^2 = %.2f$' % r2_theta,
                 fontsize=s2)
        ax_.text(0.6 * maxi + 0.4 * mini,
                 0.35 * maxi + 0.65 * mini,
                 r'$Err = %.2e$' % error_theta,
                 fontsize=s2)
        ax_.set_title('Moment theta', fontsize=s2)

        ax_ = ax[2, 1]
        ax_.scatter(moment_eta, moment_eta_gen, c=np.abs(eta))
        ax_.plot([moment_eta.min(), moment_eta.max()],
                 [moment_eta.min(), moment_eta.max()])
        ax_.set_xlabel(r'$<-\frac{\partial E}{\partial \eta}>_d$', fontsize=s2)
        ax_.set_ylabel(r'$<-\frac{\partial E}{\partial \eta}>_m$', fontsize=s2)
        r2_eta = np.corrcoef(moment_eta, moment_eta_gen)[0, 1]**2
        error_eta = np.sqrt(((moment_eta - moment_eta_gen)**2).mean())
        mini = moment_eta.min()
        maxi = moment_eta.max()
        ax_.text(0.6 * maxi + 0.4 * mini,
                 0.25 * maxi + 0.75 * mini,
                 r'$R^2 = %.2f$' % r2_eta,
                 fontsize=s2)
        ax_.text(0.6 * maxi + 0.4 * mini,
                 0.35 * maxi + 0.65 * mini,
                 r'$Err = %.2e$' % error_eta,
                 fontsize=s2)
        ax_.set_title('Moment eta', fontsize=s2)

    plt.tight_layout()
    if show:
        fig.show()

    if RBM.hidden == 'dReLU':
        errors = [error_mu, error_muh, error_vh, error_theta, error_eta]
        r2s = [r2_mu, r2_muh, r2_vh, r2_theta, r2_eta]
    else:
        errors = [error_mu, error_muh, error_vh]
        r2s = [r2_mu, r2_muh, r2_vh]

    return fig, errors, r2s
Ejemplo n.º 23
0
    def fit(self,
            data,
            weights=None,
            pseudo_count=1e-4,
            verbose=1,
            zero_diag=True):

        fi = utilities.average(data, c=self.n_c, weights=weights)
        fij = utilities.average_product(data,
                                        data,
                                        c1=self.n_c,
                                        c2=self.n_c,
                                        weights=weights)
        for i in range(self.N):
            fij[i, i] = np.diag(fi[i])

        fi_PC = (1 - pseudo_count) * fi + pseudo_count / float(self.n_c)
        fij_PC = (1 - pseudo_count) * fij + pseudo_count / float(self.n_c)**2

        for i in range(self.N):
            fij_PC[i, i] = np.diag(fi_PC[i])

        Cij = fij_PC - fi_PC[
            np.newaxis, :, np.newaxis, :] * fi_PC[:, np.newaxis, :, np.newaxis]

        D = np.zeros([self.N, self.n_c - 1, self.n_c - 1])
        invD = np.zeros([self.N, self.n_c - 1, self.n_c - 1])

        for n in range(self.N):
            D[n] = scipy.linalg.sqrtm(Cij[n, n, :-1, :-1])
            invD[n] = np.linalg.inv(D[n])

        Gamma = np.zeros([self.N, self.n_c - 1, self.N, self.n_c - 1])
        for n1 in range(self.N):
            for n2 in range(self.N):
                Gamma[n1, :,
                      n2, :] = np.dot(invD[n1],
                                      np.dot(Cij[n1, n2, :-1, :-1], invD[n2]))

        Gamma_bin = Gamma.reshape(
            [self.N * (self.n_c - 1), self.N * (self.n_c - 1)])
        Gamma_bin = (Gamma_bin + Gamma_bin.T) / 2
        lam, v = np.linalg.eigh(Gamma_bin)
        order = np.argsort(lam)[::-1]

        v_ordered = np.rollaxis(
            v.reshape([self.N, self.n_c - 1, self.N * (self.n_c - 1)]), 2,
            0)[order, :, :]
        lam_ordered = lam[order]
        DeltaL = 0.5 * (lam_ordered - 1 - np.log(lam_ordered))
        xi = np.zeros(v_ordered.shape)
        for n in range(self.N):
            xi[:, n, :] = np.dot(v_ordered[:, n, :], invD[n])
        xi = np.sqrt(np.abs(1 - 1 / lam_ordered))[:, np.newaxis,
                                                  np.newaxis] * xi

        xi = np.concatenate(
            (xi, np.zeros([self.N * (self.n_c - 1), self.N, 1])),
            axis=2)  # Write in zero-sum gauge.
        xi -= xi.mean(-1)[:, :, np.newaxis]
        top_M_contrib = np.argsort(DeltaL)[::-1][:self.M]

        self.xi = xi[top_M_contrib]
        self.lam = lam_ordered[top_M_contrib]
        self.DeltaL = DeltaL[top_M_contrib]

        couplings = np.tensordot(
            self.xi[self.lam > 1], self.xi[self.lam > 1], axes=[
                (0), (0)
            ]) - np.tensordot(
                self.xi[self.lam < 1], self.xi[self.lam < 1], axes=[(0), (0)])
        couplings = np.asarray(np.swapaxes(couplings, 1, 2), order='c')
        if zero_diag:  # With zero diag is much better; I just check things...
            for n in range(self.N):
                couplings[n, n, :, :] *= 0

        fields = np.log(fi_PC) - np.tensordot(
            couplings, fi_PC, axes=[(1, 3), (0, 1)])
        fields -= fields.mean(-1)[:, np.newaxis]

        self.layer.couplings = couplings
        self.layer.fields = fields
        if verbose:
            fig, ax = plt.subplots()
            ax2 = ax.twinx()
            ax.plot(self.DeltaL)
            ax2.semilogy(self.lam, c='red')
            ax.set_ylabel(r'$\Delta L$', color='blue')
            ax2.set_ylabel('Mode variance', color='red')
            for tl in ax.get_yticklabels():
                tl.set_color('blue')
            for tl in ax2.get_yticklabels():
                tl.set_color('red')
Ejemplo n.º 24
0
and reuse them in any of our program. let us add our average 
function to utilities module

utilities.py

def average(*values):
    return sum(values) / len(values)

We have to import our custom modules manually

"""
import utilities

values = [4, 5, 6, 7]

print(utilities.average(values))  # Output 5.5
"""
You may also choose to specifically import the average function
"""

from utilities import average
print(average(values))  # Output 5.5
"""
With this syntax, you don't need to use the dot notation 
when you call the function. Because we've explicitly 
imported average() in the import statement, we can call it 
by name when we use the function.

You can also import them as 'variable' to avoid collisions
or create short forms.
"""
Ejemplo n.º 25
0
def learn_mapping_to_alignment(alignment,
                               sequence,
                               hmmer_path=hmmer_path,
                               n_iter=3,
                               verbose=1):

    if not type(alignment) == str:  # data alignment.
        name_alignment = 'tmp.fasta'
        sequences_alignment = alignment
        Proteins_utils.write_FASTA('tmp.fasta', alignment)
    else:
        name_alignment = alignment
        sequences_alignment = Proteins_utils.load_FASTA(alignment,
                                                        drop_duplicates=False)
    sequences_alignment_original = sequences_alignment.copy()
    consensus_sequence = np.argmax(utilities.average(
        sequences_alignment_original, c=21)[:, :-1],
                                   axis=1)[np.newaxis, :]
    if type(sequence) == str:
        sequence_num = Proteins_utils.seq2num(sequence)
    else:
        sequence_num = sequence
        if sequence_num.ndim == 1:
            sequence_num = sequence_num[np.newaxis]

    Proteins_utils.write_FASTA('tmp_target.fasta', sequence_num)

    for iteration in range(1, n_iter + 1):
        hmm_alignment = 'tmp.hmm'
        if iteration > 1:
            cmd = hmmer_path + 'src/hmmbuild --symfrac 0 --wnone %s %s' % (
                hmm_alignment, name_alignment)
        else:
            cmd = hmmer_path + 'src/hmmbuild --symfrac 0 %s %s' % (
                hmm_alignment, name_alignment)
        os.system(cmd)
        cmd = hmmer_path + 'src/hmmalign -o tmp_aligned.txt %s %s' % (
            hmm_alignment, 'tmp_target.fasta')
        os.system(cmd)
        cmd = hmmer_path + 'easel/miniapps/esl-reformat --informat stockholm afa tmp_aligned.txt > tmp_aligned.fasta'
        os.system(cmd)
        sequence_aligned = ''.join(
            open('tmp_aligned.fasta', 'r').read().split('\n')[1:])
        if verbose:
            print('Iteration %s: %s,' % (iteration, sequence_aligned))
        mapping_alignment_to_struct = []
        sequence_ref_aligned = []
        index_sequence = 0
        index_alignment = 0
        for k, s in enumerate(sequence_aligned):
            if s == '-':
                mapping_alignment_to_struct.append(-1)
                index_alignment += 1
                sequence_ref_aligned.append('-')
            elif s == s.upper():
                mapping_alignment_to_struct.append(index_sequence)
                index_sequence += 1
                index_alignment += 1
                sequence_ref_aligned.append(s)
            elif s == s.lower():
                index_sequence += 1

        mapping_alignment_to_struct = np.array(mapping_alignment_to_struct,
                                               dtype='int')
        print(len(sequence_ref_aligned))
        sequence_ref_aligned = Proteins_utils.seq2num(
            ''.join(sequence_ref_aligned))
        if verbose:
            fraction_of_sites = (mapping_alignment_to_struct != -1).mean()
            print(
                'Iteration %s, fraction of sites mapped on the structure: %.2f'
                % (iteration, fraction_of_sites))

        top_closest = np.minimum(50,
                                 sequences_alignment_original.shape[0] // 5)
        closest = np.argsort(
            (sequences_alignment_original == sequence_ref_aligned
             ).mean(1))[::-1][:top_closest]
        name_alignment = 'tmp.fasta'
        reduced_alignment = np.concatenate(
            (np.repeat(sequences_alignment_original[closest], 10,
                       axis=0), consensus_sequence),
            axis=0
        )  # Need to add the consensus sequence. Otherwise, hmmalign can remove a column if it has only gaps in the reduced alignment. compensate by increasing the weights of the other sequences and removing the reweighting.
        Proteins_utils.write_FASTA('tmp.fasta', reduced_alignment)
    os.system(
        'rm tmp_target.fasta tmp_aligned.txt tmp_aligned.fasta tmp.hmm tmp.fasta'
    )
    return mapping_alignment_to_struct, sequence_ref_aligned
Ejemplo n.º 26
0
def calculate_error(RBM,
                    data_tr,
                    N_sequences=800000,
                    Nstep=10,
                    background=None):
    N = RBM.n_v
    q = RBM.n_cv

    # Check how moments are reproduced
    # Means
    mudata = RBM.mu_data  # empirical averages
    #datav, datah = RBM.gen_data(Nchains = int(100), Lchains = int(N_sequences/100), Nthermalize=int(500), background= background)
    datav, datah = RBM.gen_data(Nchains=int(100),
                                Lchains=int(N_sequences / 100),
                                Nthermalize=int(500))
    mugen = utilities.average(datav, c=q, weights=None)

    # Correlations
    covgen = utilities.average_product(
        datav, datav, c1=q,
        c2=q) - mugen[:, np.newaxis, :, np.newaxis] * mugen[np.newaxis, :,
                                                            np.newaxis, :]
    covdata = utilities.average_product(
        data_tr, data_tr, c1=q,
        c2=q) - mudata[:, np.newaxis, :, np.newaxis] * mudata[np.newaxis, :,
                                                              np.newaxis, :]
    fdata = utilities.average_product(data_tr, data_tr, c1=q, c2=q)

    #put to zero the diagonal elements of the covariance
    for i in range(N):
        covdata[i, i, :, :] = np.zeros((q, q))
        fdata[i, i, :, :] = np.zeros((q, q))
        covgen[i, i, :, :] = np.zeros((q, q))

    M = len(data_tr)
    maxp = float(1) / float(M)
    pp = 1
    ps = 0.00001  # pseudocount for fully conserved sites

    # error on frequency
    pp = 1
    errm = 0
    neffm = 0
    for i in range(N):
        for a in range(q):
            neffm += 1
            if mudata[i, a] < maxp:
                errm += np.power((mugen[i, a] - mudata[i, a]),
                                 2) / (float(1 - maxp) * float(maxp))
            else:
                if mudata[i, a] != 1.0:
                    errm += np.power(
                        (mugen[i, a] - mudata[i, a]),
                        2) / (float(1 - mudata[i, a]) * float(mudata[i, a]))
                else:
                    errm += np.power((mugen[i, a] - mudata[i, a]), 2) / (
                        float(1 - mudata[i, a] - ps) * float(mudata[i, a]))

    errmt = np.sqrt(float(1) / (float(neffm) * float(maxp)) * float(errm))
    # rigourously, there would be also the regularization term in the difference errm!

    # error on correlations
    errc = 0
    neffc = 0
    for i in range(N):
        for j in range(i + 1, N):
            for a in range(q):
                for b in range(a + 1, q):
                    neffc += 1
                    if covdata[i, j, a, b] < maxp:
                        den = np.power(
                            np.sqrt(float(1 - maxp) * float(maxp)) +
                            mudata[i, a] * np.sqrt(mudata[j, b] *
                                                   (1 - mudata[j, b])) +
                            mudata[j, b] * np.sqrt(mudata[i, a] *
                                                   (1 - mudata[i, a])), 2)
                        errc += np.power(
                            (covgen[i, j, a, b] - covdata[i, j, a, b]),
                            2) / float(den)
                    else:
                        den = np.power(
                            np.sqrt(
                                float(1 - fdata[i, j, a, b]) *
                                float(fdata[i, j, a, b])) +
                            mudata[i, a] * np.sqrt(mudata[j, b] *
                                                   (1 - mudata[j, b])) +
                            mudata[j, b] * np.sqrt(mudata[i, a] *
                                                   (1 - mudata[i, a])), 2)
                        errc += np.power(
                            (covgen[i, j, a, b] - covdata[i, j, a, b]),
                            2) / float(den)

    errct = np.sqrt(float(1) / (float(neffc) * float(maxp)) * float(errc))
    return (errmt, errct)
Ejemplo n.º 27
0
    def fit(self,
            X,
            Y,
            weights=None,
            batch_size=100,
            learning_rate=None,
            lr_final=None,
            lr_decay=True,
            decay_after=0.5,
            extra_params=None,
            optimizer='ADAM',
            n_iter=10,
            verbose=1,
            regularizers=[]):

        self.batch_size = batch_size
        self.optimizer = optimizer
        self.n_iter = n_iter
        if self.n_iter <= 1:
            lr_decay = False
        if learning_rate is None:
            if self.optimizer == 'SGD':
                learning_rate = 0.01
            elif self.optimizer == 'ADAM':
                learning_rate = 5e-4
            else:
                print('Need to specify learning rate for optimizer.')
        if self.optimizer == 'ADAM':
            if extra_params is None:
                extra_params = [0.9, 0.99, 1e-3]
            self.beta1 = extra_params[0]
            self.beta2 = extra_params[1]
            self.epsilon = extra_params[2]
            if self.n_cout > 1:
                out0 = np.zeros([1, self.Nout, self.n_cout], dtype=curr_float)
            else:
                out0 = np.zeros([1, self.Nout], dtype=curr_float)

            grad = {
                'weights':
                np.zeros_like(self.weights),
                'output_layer':
                self.output_layer.internal_gradients(out0,
                                                     out0,
                                                     value='input',
                                                     value_neg='input')
            }

            for key in grad['output_layer'].keys():
                grad['output_layer'][key] *= 0

            self.gradient_moment1 = copy.deepcopy(grad)
            self.gradient_moment2 = copy.deepcopy(grad)

        self.learning_rate_init = copy.copy(learning_rate)
        self.learning_rate = learning_rate
        self.lr_decay = lr_decay
        if self.lr_decay:
            self.decay_after = decay_after
            self.start_decay = int(self.n_iter * self.decay_after)
            if lr_final is None:
                self.lr_final = 1e-2 * self.learning_rate
            else:
                self.lr_final = lr_final
            self.decay_gamma = (float(self.lr_final) /
                                float(self.learning_rate))**(
                                    1 / float(self.n_iter *
                                              (1 - self.decay_after)))
        else:
            self.decay_gamma = 1
        self.regularizers = regularizers

        n_samples = X.shape[0]
        n_batches = int(np.ceil(float(n_samples) / self.batch_size))
        batch_slices = list(
            utilities.gen_even_slices(n_batches * self.batch_size, n_batches,
                                      n_samples))

        X = np.asarray(X, dtype=self.input_layer.type, order='c')
        Y = np.asarray(Y, dtype=self.output_layer.type, order='c')
        if weights is not None:
            weights = weights.astype(curr_float)

        self.moments_Y = self.output_layer.get_moments(Y,
                                                       weights=weights,
                                                       value='data')
        self.moments_XY = utilities.average_product(X,
                                                    Y,
                                                    c1=self.n_cin,
                                                    c2=self.n_cout,
                                                    mean1=False,
                                                    mean2=False,
                                                    weights=weights)

        self.count_updates = 0

        for epoch in range(1, n_iter + 1):
            if verbose:
                begin = time.time()
            if self.lr_decay:
                if (epoch > self.start_decay):
                    self.learning_rate *= self.decay_gamma

            permutation = np.argsort(np.random.randn(n_samples))
            X = X[permutation, :]
            Y = Y[permutation, :]
            if weights is not None:
                weights = weights[permutation]

            if verbose:
                print('Starting epoch %s' % (epoch))
            for batch_slice in batch_slices:
                if weights is not None:
                    self.minibatch_fit(X[batch_slice],
                                       Y[batch_slice],
                                       weights=weights[batch_slice])
                else:
                    self.minibatch_fit(X[batch_slice],
                                       Y[batch_slice],
                                       weights=None)

            if verbose:
                end = time.time()
                lik = utilities.average(self.likelihood(X, Y), weights=weights)
                regularization = 0
                for regtype, regtarget, regvalue in self.regularizers:
                    if regtarget == 'weights':
                        target = self.weights
                    else:
                        target = self.output_layer.__dict__[regtarget]
                    if regtype == 'l1':
                        regularization += (regvalue * np.abs(target)).sum()
                    elif regtype == 'l2':
                        regularization += 0.5 * (regvalue * target**2).sum()
                    else:
                        print(regtype, 'not supported')
                    regularization /= self.Nout
                message = "Iteration %d, time = %.2fs, likelihood = %.2f, regularization  = %.2e, loss = %.2f" % (
                    epoch, end - begin, lik, regularization,
                    -lik + regularization)
                print(message)
        return 'done'