예제 #1
0
 def cumsum(v,strict=False):
     if not strict:
         return np.cumsum(v,axis=0)
     else:
         out = np.zeros_like(v)
         out[1:] = np.cumsum(v[:-1],axis=0)
         return out
예제 #2
0
 def lagrangian(self, angles, omegas):
     y = np.cumsum(self.lengths * np.cos(angles))
     x_dot = np.cumsum( self.lengths * np.cos(angles) * omegas)
     y_dot = np.cumsum(-self.lengths * np.sin(angles) * omegas)
     V = np.sum(y * self.masses) * self.g
     T = 0.5 * np.sum(self.masses * (x_dot**2 + y_dot**2))
     return T - V
예제 #3
0
    def transition_matrix(self):
        if self._transition_matrix is not None:
            return self._transition_matrix

        As, rs, ps = self.Ps, self.rs, self.ps

        # Fill in the transition matrix one block at a time
        K_total = self.total_num_states
        P = np.zeros((K_total, K_total))
        starts = np.concatenate(([0], np.cumsum(rs)[:-1]))
        ends = np.cumsum(rs)
        for (i, j), Aij in np.ndenumerate(As):
            block = P[starts[i]:ends[i], starts[j]:ends[j]]

            # Diagonal blocks (stay in sub-state or advance to next sub-state)
            if i == j:
                for k in range(rs[i]):
                    # p(z_{t+1} = (.,i+k) | z_t = (.,i)) = (1-p)^k p
                    # for 0 <= k <= r - i
                    block += (1 - ps[i])**k * ps[i] * np.diag(np.ones(rs[i]-k), k=k)

            # Off-diagonal blocks (exit to a new super state)
            else:
                # p(z_{t+1} = (j,1) | z_t = (k,i)) = (1-p_k)^{r_k-i+1} * A[k, j]
                block[:,0] = (1-ps[i]) ** np.arange(rs[i], 0, -1) * Aij

        assert np.allclose(P.sum(1),1)
        assert (0 <= P).all() and (P <= 1.).all()

        # Cache the transition matrix
        self._transition_matrix = P

        return P
예제 #4
0
    def get_repaneled_airfoil(self, n_points_per_side=100):
        # Returns a repaneled version of the airfoil with cosine-spaced coordinates on the upper and lower surfaces.
        # Inputs:
        #   # n_points_per_side is the number of points PER SIDE (upper and lower) of the airfoil. 100 is a good number.
        # Notes: The number of points defining the final airfoil will be n_points_per_side*2-1,
        # since one point (the leading edge point) is shared by both the upper and lower surfaces.

        upper_original_coors = self.upper_coordinates(
        )  # Note: includes leading edge point, be careful about duplicates
        lower_original_coors = self.lower_coordinates(
        )  # Note: includes leading edge point, be careful about duplicates

        # Find distances between coordinates, assuming linear interpolation
        upper_distances_between_points = np.sqrt(
            np.power(
                upper_original_coors[:-1, 0] -
                upper_original_coors[1:, 0], 2) +
            np.power(
                upper_original_coors[:-1, 1] - upper_original_coors[1:, 1], 2))
        lower_distances_between_points = np.sqrt(
            np.power(
                lower_original_coors[:-1, 0] -
                lower_original_coors[1:, 0], 2) +
            np.power(
                lower_original_coors[:-1, 1] - lower_original_coors[1:, 1], 2))
        upper_distances_from_TE = np.hstack(
            (0, np.cumsum(upper_distances_between_points)))
        lower_distances_from_LE = np.hstack(
            (0, np.cumsum(lower_distances_between_points)))
        upper_distances_from_TE_normalized = upper_distances_from_TE / upper_distances_from_TE[
            -1]
        lower_distances_from_LE_normalized = lower_distances_from_LE / lower_distances_from_LE[
            -1]

        # Generate a cosine-spaced list of points from 0 to 1
        s = cosspace(n_points=n_points_per_side)

        x_upper_func = sp_interp.PchipInterpolator(
            x=upper_distances_from_TE_normalized, y=upper_original_coors[:, 0])
        y_upper_func = sp_interp.PchipInterpolator(
            x=upper_distances_from_TE_normalized, y=upper_original_coors[:, 1])
        x_lower_func = sp_interp.PchipInterpolator(
            x=lower_distances_from_LE_normalized, y=lower_original_coors[:, 0])
        y_lower_func = sp_interp.PchipInterpolator(
            x=lower_distances_from_LE_normalized, y=lower_original_coors[:, 1])

        x_coors = np.hstack((x_upper_func(s), x_lower_func(s)[1:]))
        y_coors = np.hstack((y_upper_func(s), y_lower_func(s)[1:]))

        coordinates = np.column_stack((x_coors, y_coors))

        # Make a new airfoil with the coordinates
        name = self.name + ", repaneled to " + str(n_points_per_side) + " pts"
        new_airfoil = Airfoil(name=name,
                              coordinates=coordinates,
                              repanel=False)

        return new_airfoil
예제 #5
0
def project_simplex_bounded(r, lb, ub):
    assert lb.sum() <= 1 and ub.sum() >= 1 and np.all(lb <= ub), 'not feasible'
    lambdas = np.append(lb - r, ub - r)
    idx = np.argsort(lambdas)
    lambdas = lambdas[idx]
    active = np.cumsum((idx < r.size) * 2 - 1)[:-1]
    diffs = np.diff(lambdas, n=1)
    totals = lb.sum() + np.cumsum(active * diffs)
    i = np.searchsorted(totals, 1.0)
    lam = (1 - totals[i]) / active[i] + lambdas[i + 1]
    return np.clip(r + lam, lb, ub)
예제 #6
0
def flatten(value):
    """Flattens any nesting of tuples, arrays, or dicts.
       Returns 1D numpy array and an unflatten function.
       Doesn't preserve mixed numeric types (e.g. floats and ints).
       Assumes dict keys are sortable."""
    if isinstance(getval(value), np.ndarray):
        shape = value.shape

        def unflatten(vector):
            return np.reshape(vector, shape)

        return np.ravel(value), unflatten

    elif isinstance(getval(value), (float, int)):
        return np.array([value]), lambda x: x[0]

    elif isinstance(getval(value), (tuple, list)):
        constructor = type(getval(value))
        if not value:
            return np.array([]), lambda x: constructor()
        flat_pieces, unflatteners = zip(*map(flatten, value))
        split_indices = np.cumsum([len(vec) for vec in flat_pieces[:-1]])

        def unflatten(vector):
            pieces = np.split(vector, split_indices)
            return constructor(
                unflatten(v) for unflatten, v in zip(unflatteners, pieces))

        return np.concatenate(flat_pieces), unflatten

    elif isinstance(getval(value), dict):
        items = sorted(iteritems(value), key=itemgetter(0))
        keys, flat_pieces, unflatteners = zip(*[(k, ) + flatten(v)
                                                for k, v in items])
        split_indices = np.cumsum([len(vec) for vec in flat_pieces[:-1]])

        def unflatten(vector):
            pieces = np.split(vector, split_indices)
            return {
                key: unflattener(piece)
                for piece, unflattener, key in zip(pieces, unflatteners, keys)
            }

        return np.concatenate(flat_pieces), unflatten

    else:
        raise Exception("Don't know how to flatten type {}".format(
            type(value)))
예제 #7
0
def simple_five_pop_demo(x=np.random.normal(size=30)):
    assert len(x) == 30
    # make all params positive
    x = np.exp(x)

    # # allow negative growth rates
    # for i in range(15,20):
    #     x[i] = np.log(x[i])
    # # make times increasing
    # for i in range(1,15):
    #     x[i] = x[i] + x[i-1]

    t = np.cumsum(x[:15])
    # allow negative growth rates
    g = np.log(x[15:20])

    model = momi.DemographicModel(1.0, .25)
    for pop in range(1, 6):
        model.add_leaf(pop)
    model.set_size(5, t[0], g=g[0])
    model.set_size(4, t[1], g=g[1])
    model.set_size(3, t[2], g=g[2])
    model.set_size(2, t[3], g=g[3])
    model.set_size(1, t[4], g=g[4])
    model.move_lineages(5, 4, t=t[5], N=x[20])
    model.set_size(3, t=t[6], N=x[21])
    model.set_size(2, t=t[7], N=x[22])
    model.set_size(1, t[8], N=x[23])
    model.move_lineages(4, 3, t[9], N=x[24])
    model.set_size(2, t[10], N=x[25])
    model.set_size(1, t[11], N=x[26])
    model.move_lineages(3, 2, t[12], N=x[27])
    model.set_size(1, t[13], N=x[28])
    model.move_lineages(2, 1, t[14], N=x[29])
    return model
예제 #8
0
    def Fit(self, X, Y, **kwargs):
        self.cov = np.cov(Y.T)
        if not self.cov.shape:
            # you could be spllied with a 1 feature data set, in which cas self.cov is just a number
            self.eigval = self.cov
            self.eigvec = np.eye(1)
            self.cov = self.cov.reshape(-1, 1)
        else:
            self.eigval, self.eigvec = np.linalg.eigh(self.cov)
            idx = self.eigval.argsort()[::-1]
            self.eigval = self.eigval[idx]
            self.eigvec = self.eigvec[:, idx]
            if self.percentage is not None:
                total_val = sum(self.eigval)
                running_fraction = np.cumsum(self.eigval) / total_val
                self.component = np.searchsorted(running_fraction,
                                                 self.percentage)
                if self.component == 0:
                    self.component = 1

            assert (self.component <= Y.shape[1]
                    ), "number of components cannot exceed number of variables"
            self.reconstruction_error = np.sum(
                self.eigval[self.component:]) / self.cov.shape[0]
            if self.reconstruction_error is None or np.isnan(
                    self.reconstruction_error):
                self.reconstruction_error = 0
            self.eigval = self.eigval[0:self.component]
            self.eigvec = self.eigvec[:, 0:self.component]
예제 #9
0
    def _initialize_with_pca(self,
                             datas,
                             inputs=None,
                             masks=None,
                             tags=None,
                             num_iters=20):
        for data in datas:
            assert data.shape[1] == self.N

        N_offsets = np.cumsum(self.N_vec)[:-1]
        pcas = []

        split_datas = list(
            zip(*[np.split(data, N_offsets, axis=1) for data in datas]))
        split_masks = list(
            zip(*[np.split(mask, N_offsets, axis=1) for mask in masks]))
        assert len(split_masks) == len(split_datas) == self.P

        for em, dps, mps in zip(self.emissions_models, split_datas,
                                split_masks):
            pcas.append(em._initialize_with_pca(dps, inputs, mps, tags))

        # Combine the PCA objects
        from sklearn.decomposition import PCA
        pca = PCA(self.D)
        pca.components_ = block_diag(*[p.components_ for p in pcas])
        pca.mean_ = np.concatenate([p.mean_ for p in pcas])
        # Not super pleased with this, but it should work...
        pca.noise_variance_ = np.concatenate(
            [p.noise_variance_ * np.ones(n) for p, n in zip(pcas, self.N_vec)])
        return pca
예제 #10
0
파일: preprocessing.py 프로젝트: zhoupc/ssm
def pca_with_imputation(D, datas, masks, num_iters=20):
    if isinstance(datas, (list, tuple)) and isinstance(masks, (list, tuple)):
        data = np.concatenate(datas)
        mask = np.concatenate(masks)

    if np.any(~mask):
        # Fill in missing data with mean to start
        fulldata = data.copy()
        for n in range(fulldata.shape[1]):
            fulldata[~mask[:, n], n] = fulldata[mask[:, n], n].mean()

        for itr in range(num_iters):
            # Run PCA on imputed data
            pca = PCA(D)
            x = pca.fit_transform(fulldata)

            # Fill in missing data with PCA predictions
            pred = pca.inverse_transform(x)
            fulldata[~mask] = pred[~mask]
    else:
        pca = PCA(D)
        x = pca.fit_transform(data)

    # Unpack xs
    xs = np.split(x, np.cumsum([len(data) for data in datas])[:-1])
    assert len(xs) == len(datas)
    assert all([x.shape[0] == data.shape[0] for x, data in zip(xs, datas)])

    return pca, xs
예제 #11
0
 def render(self):
     low, high = 0.1, 0.9
     angles, _ = self.state
     canvas = np.zeros((self.num_pix, self.num_pix)) + low
     radius = np.sum(self.lengths)
     joint_coords_x = np.cumsum(self.lengths * np.sin(angles)) / radius / 1.2
     joint_coords_y = np.cumsum(self.lengths * np.cos(angles)) / radius / 1.2
     joint_coords_x = np.concatenate((np.zeros(1), joint_coords_x))
     joint_coords_y = np.concatenate((np.zeros(1), joint_coords_y))
     joint_coords = np.concatenate((joint_coords_x[:, None],
                                    joint_coords_y[:, None]), axis=1)
     canvas_coords = array_meshgrid(self.num_pix)
     for point_A, point_B in zip(joint_coords[:-1], joint_coords[1:]):
         D = distance_to_segment(point_A, point_B, canvas_coords)
         canvas = np.maximum(canvas, high * np.exp(-((D/self.width)*20)**4))
     return canvas
예제 #12
0
def inv_cdf_sampler(target, n=1, bounds=(-10, 10, 1000)):
    """
    random variable sampler using the interpolated inverse cdf method

    Args:

        n (int) : number of samples.

            must be either a positive integer or None.
            if n is a positive int, rvs returns an np.array of length n
            if n is None, rvs returns a scalar sample from the distribution

        bounds (tuple or list) : (lower bound, upper bound, number of ticks)

            [-10, 10, 10000] / (-10, 10, 10000)
            create 10000 ticks between -10 and 10


    Return:
        float or np.array([float]) of samples
    """
    ll = np.linspace(*bounds)

    cdf_data = np.cumsum(target(ll))*(ll[1]-ll[0])
    cdf_data /=cdf_data[-1]
    cdf_inv = sp.interpolate.interp1d(cdf_data, ll)
    return cdf_inv(np.random.uniform(size=n))
예제 #13
0
 def unflatten(vector):
     split_ixs = np.cumsum(lengths)
     pieces = np.split(vector, split_ixs)
     return {key: unflattener(piece)
             for piece, unflattener, key in zip(pieces,
                                                unflatteners,
                                                keys)}
예제 #14
0
    def initialize(self, x, u, **kwargs):
        localize = kwargs.get('localize', True)

        Ts = [_x.shape[0] for _x in x]
        if localize:
            from sklearn.cluster import KMeans
            km = KMeans(self.nb_states, random_state=1)
            km.fit((np.vstack(x)))
            zs = np.split(km.labels_, np.cumsum(Ts)[:-1])
            zs = [z[:-1] for z in zs]
        else:
            zs = [npr.choice(self.nb_states, size=T - 1) for T in Ts]

        _cov = np.zeros((self.nb_states, self.dm_obs, self.dm_obs))
        for k in range(self.nb_states):
            ts = [np.where(z == k)[0] for z in zs]
            xs = [
                np.hstack((_x[t, :], _u[t, :])) for t, _x, _u in zip(ts, x, u)
            ]
            ys = [_x[t + 1, :] for t, _x in zip(ts, x)]

            coef_, intercept_, sigma = linear_regression(xs, ys)
            self.A[k, ...] = coef_[:, :self.dm_obs]
            self.B[k, ...] = coef_[:, self.dm_obs:]
            self.c[k, :] = intercept_
            _cov[k, ...] = sigma

        self.cov = _cov
예제 #15
0
    def get_downsampled_mcl(self, mcl_fractions):
        # Returns the mean camber line in downsampled form


        mcl = self.mcl_coordinates
        # Find distances along mcl, assuming linear interpolation
        mcl_distances_between_points = np.sqrt(
            np.power(mcl[:-1, 0] - mcl[1:, 0], 2) +
            np.power(mcl[:-1, 1] - mcl[1:, 1], 2)
        )
        mcl_distances_cumulative = np.hstack((0, np.cumsum(mcl_distances_between_points)))
        mcl_distances_cumulative_normalized = mcl_distances_cumulative / mcl_distances_cumulative[-1]

        mcl_downsampled_x=np.interp(
            x=mcl_fractions,
            xp=mcl_distances_cumulative_normalized,
            fp=mcl[:,0]
        )
        mcl_downsampled_y = np.interp(
            x=mcl_fractions,
            xp=mcl_distances_cumulative_normalized,
            fp=mcl[:, 1]
        )

        mcl_downsampled = np.column_stack((mcl_downsampled_x, mcl_downsampled_y))

        return mcl_downsampled
예제 #16
0
 def forward(self, x, input, tag):
     assert x.shape[1] == self.D
     D_offsets = np.cumsum(self.D_vec)[:-1]
     datas = []
     for em, xp in zip(self.emissions_models, np.split(x, D_offsets, axis=1)):
         datas.append(em.forward(xp, input, tag))
     return np.concatenate(datas, axis=2)
예제 #17
0
def main(argv):
  del argv  # Unused.

  x_scale = 0.1
  y_scale = 1.
  T = 50

  x_list = np.cumsum(x_scale * np.random.randn(T))
  y_list = np.array([x_list[t] + y_scale * np.random.randn() for t in range(T)])

  marginal = make_marginal_fn()
  marginal_grad = grad(lambda y_list, scales: marginal(y_list, *scales), 1)

  x_scale_est = 0.1
  y_scale_est = 1.
  step_size = 0.5 / T
  for i in range(100):
    t0 = time.time()
    x_scale_grad, y_scale_grad = marginal_grad(
        y_list, (x_scale_est, y_scale_est))
    x_scale_est *= np.exp(step_size * x_scale_est * x_scale_grad)
    y_scale_est *= np.exp(step_size * y_scale_est * y_scale_grad)
    print('{}\t{}\t{}\t{}\t{}'.format(
        time.time() - t0, i, marginal(y_list, x_scale_est, y_scale_est),
        x_scale_est, y_scale_est))
예제 #18
0
    def initialize(self,
                   datas,
                   inputs=None,
                   masks=None,
                   tags=None,
                   init_method="random"):
        Ts = [data.shape[0] for data in datas]

        # Get initial discrete states
        if init_method.lower() == 'kmeans':
            # KMeans clustering
            from sklearn.cluster import KMeans
            km = KMeans(self.K)
            km.fit(np.vstack(datas))
            zs = np.split(km.labels_, np.cumsum(Ts)[:-1])

        elif init_method.lower() == 'random':
            # Random assignment
            zs = [npr.choice(self.K, size=T) for T in Ts]

        else:
            raise Exception(
                'Not an accepted initialization type: {}'.format(init_method))

        # Make a one-hot encoding of z and treat it as HMM expectations
        Ezs = [one_hot(z, self.K) for z in zs]
        expectations = [(Ez, None, None) for Ez in Ezs]

        # Set the variances all at once to use the setter
        self.m_step(expectations, datas, inputs, masks, tags)
def to_diffable_arr(proba_KV, min_eps=MIN_EPS, do_force_safe=False):
    ''' Transform normalized topics to unconstrained space.

    Args
    ----
    proba_KV : 2D array, size K x V
        minimum value of any entry must be min_eps
        each row should sum to 1.0

    Returns
    -------
    reals_KVm1 : 2D array, size K x (V-1)
        unconstrained real values

    Examples
    --------
    >>> np.set_printoptions(precision=3)
    >>> V = 4
    >>> unif_1V = np.ones((1,V)) / float(V)
    >>> to_diffable_arr(unif_1V)
    array([[ 2.22e-16, -1.11e-16,  0.00e+00]])

    >>> rand_1V = np.asarray([[ 0.11, 0.22, 0.33, 0.20, 0.14 ]])
    >>> to_diffable_arr(rand_1V)
    array([[-0.704, -0.015,  0.663,  0.357]])

    '''
    assert proba_KV.ndim == 2
    K, V = proba_KV.shape
    offset_Vm1 = -1.0 * np.log(V - np.arange(1.0, V))

    cumsum_KV1m = np.maximum(1e-100, 1.0 - np.cumsum(proba_KV[:, :-1], axis=1))
    fracs_KV = np.hstack([proba_KV[:, :1], proba_KV[:, 1:] / cumsum_KV1m])
    reals_KVm1 = (inv_logistic_sigmoid(fracs_KV[:, :-1]) - offset_Vm1)
    return reals_KVm1
예제 #20
0
def _simplex_projection(x):
    u = np.sort(x)[::-1]
    idcs = np.arange(1, u.shape[0] + 1)
    rho_nz = u + 1. / idcs * (1. - np.cumsum(u)) > 0
    rho = idcs[rho_nz].max()
    lmb = 1. / rho * (1. - u[:rho].sum())
    out = np.maximum(x + lmb, 0.)
    return out / out.sum()
def get_e_log_cluster_probabilities_from_e_log_stick(e_log_v, e_log_1mv):
    zeros_shape = e_log_v.shape[0:-1] + (1,)

    e_log_stick_remain = np.concatenate([np.zeros(zeros_shape), \
                                        np.cumsum(e_log_1mv, axis = -1)], axis = -1)
    e_log_new_stick = np.concatenate((e_log_v, np.zeros(zeros_shape)), axis = -1)

    return (e_log_stick_remain + e_log_new_stick).squeeze()
예제 #22
0
def rank_by_variance(X, q, var_percentage=0.8):
    if q is not None: return q

    [U, Σ, V] = np.linalg.svd(X, full_matrices=False)
    rank_sorted = np.cumsum(Σ) / np.sum(Σ)

    rank = np.sum(rank_sorted < var_percentage) + 1
    return rank
예제 #23
0
파일: tm.py 프로젝트: simonkamronn/autohmm
    def sample(self, n_samples=2000, observed_states=None, random_state=None):
        """Generate random samples from the self.

        Parameters
        ----------
        n : int
            Number of samples to generate.

        observed_states : array
            If provided, states are not sampled.

        random_state: RandomState or an int seed
            A random number generator instance. If None is given, the
            object's random_state is used

        Returns
        -------
        samples : array_like, length (``n_samples``)
                  List of samples

        states : array_like, shape (``n_samples``)
                 List of hidden states (accounting for tied states by giving
                 them the same index)
        """
        if random_state is None:
            random_state = self.random_state
        random_state = check_random_state(random_state)

        samples = np.zeros(n_samples)
        states = np.zeros(n_samples)

        if observed_states is None:
            startprob_pdf = np.exp(np.copy(self._log_startprob))
            startdist = stats.rv_discrete(name='custm',
                                      values=(np.arange(startprob_pdf.shape[0]),
                                                        startprob_pdf),
                                      seed=random_state)
            states[0] = startdist.rvs(size=1)[0]

            transmat_pdf = np.exp(np.copy(self._log_transmat))
            transmat_cdf = np.cumsum(transmat_pdf, 1)

            nrand = random_state.rand(n_samples)
            for idx in range(1,n_samples):
                newstate = (transmat_cdf[states[idx-1]] > nrand[idx-1]).argmax()
                states[idx] = newstate
        else:
            states = observed_states

        mu = np.copy(self._mu_)
        precision = np.copy(self._precision_)
        for idx in range(n_samples):
            mean_ = self._mu_[states[idx]]
            var_ = np.sqrt(1/precision[states[idx]])
            samples[idx] = norm.rvs(loc=mean_, scale=var_, size=1,
                                    random_state=random_state)
        states = self._process_sequence(states)
        return samples, states
예제 #24
0
def stochastic_iterate_averaging(estimate, start):
    N = estimate.shape[0]
    if N - start <= 0:
        raise "Start of stationary distribution must be lower than number of iterates"

    window_lengths = np.reshape(np.arange(start, N) - start + 1, [-1, 1])
    estimate_iters = np.cumsum(estimate[start:, :], axis=0) / window_lengths
    estimate_mean = estimate_iters[-1]
    return (estimate_iters, estimate_mean)
예제 #25
0
 def _invert(self, data, input, mask, tag):
     assert data.shape[1] == self.N
     N_offsets = np.cumsum(self.N_vec)[:-1]
     states = []
     for em, dp, mp in zip(self.emissions_models,
                         np.split(data, N_offsets, axis=1),
                         np.split(mask, N_offsets, axis=1)):
         states.append(em._invert(dp, input, mp, tag))
     return np.column_stack(states)
def mpi_split(work_size, comm_size):
    base = work_size // comm_size
    leftover = int(work_size % comm_size)

    sizes = numpy.ones(comm_size, dtype=int) * base
    sizes[:leftover] += 1

    offsets = numpy.zeros(comm_size, dtype=int)
    offsets[1:] = numpy.cumsum(sizes)[:-1]

    return sizes, offsets
예제 #27
0
def stick_forward(x_):
    x = x_.T
    # reverse cumsum
    x0 = x[:-1]
    s = np.cumsum(x0[::-1], 0)[::-1] + x[-1]
    z = x0 / s
    Km1 = x.shape[0] - 1
    k = np.arange(Km1)[(slice(None), ) + (None, ) * (x.ndim - 1)]
    eq_share = logit(1. / (Km1 + 1 - k))  # - np.log(Km1 - k)
    y = logit(z) - eq_share
    return y.T
예제 #28
0
def resampling(w, rs):
    """
    Stratified resampling with "nograd_primitive" to ensure autograd 
    takes no derivatives through it.
    """
    N = w.shape[0]
    bins = np.cumsum(w)
    ind = np.arange(N)
    u = (ind + rs.rand(N)) / N

    return np.digitize(u, bins)
예제 #29
0
def flatten(value):
    """Flattens any nesting of tuples, arrays, or dicts.
       Returns 1D numpy array and an unflatten function.
       Doesn't preserve mixed numeric types (e.g. floats and ints).
       Assumes dict keys are sortable."""
    if isinstance(getval(value), np.ndarray):
        shape = value.shape
        def unflatten(vector):
            return np.reshape(vector, shape)
        return np.ravel(value), unflatten

    elif isinstance(getval(value), (float, int)):
        return np.array([value]), lambda x : x[0]

    elif isinstance(getval(value), (tuple, list)):
        constructor = type(getval(value))
        if not value:
            return np.array([]), lambda x : constructor()
        flat_pieces, unflatteners = zip(*map(flatten, value))
        split_indices = np.cumsum([len(vec) for vec in flat_pieces[:-1]])

        def unflatten(vector):
            pieces = np.split(vector, split_indices)
            return constructor(unflatten(v) for unflatten, v in zip(unflatteners, pieces))

        return np.concatenate(flat_pieces), unflatten

    elif isinstance(getval(value), dict):
        items = sorted(iteritems(value), key=itemgetter(0))
        keys, flat_pieces, unflatteners = zip(*[(k,) + flatten(v) for k, v in items])
        split_indices = np.cumsum([len(vec) for vec in flat_pieces[:-1]])

        def unflatten(vector):
            pieces = np.split(vector, split_indices)
            return {key: unflattener(piece)
                    for piece, unflattener, key in zip(pieces, unflatteners, keys)}

        return np.concatenate(flat_pieces), unflatten

    else:
        raise Exception("Don't know how to flatten type {}".format(type(value)))
def get_e_num_large_clusters_from_ez(e_z,
                                    threshold = 0,
                                    n_samples = None,
                                    unif_samples = None):
    """
    Computes the expected number of clusters with at least t
    observations from cluster belongings e_z.
    Parameters
    ----------
    e_z : ndarray
        Array whose (n, k)th entry is the probability of the nth
        datapoint belonging to cluster k
    n_obs : int
        Number of observations in a dataset.
    n_samples : int
        Number of Monte Carlo samples used to compute the expected
        number of clusters.
    unv_norm_samples : ndarray, optional
        The user may pass in a precomputed array of uniform random variables
        on which the reparameterization trick is applied to compute the
        expected number of clusters.
    Returns
    -------
    float
        The expected number of clusters with at least ``threshold`` observations
        in a dataset the same size as e_z
    """

    n_obs = e_z.shape[0]
    n_clusters = e_z.shape[1]

    # draw uniform samples
    if unif_samples is None:
        assert n_samples is not None
        unif_samples = np.random.random((n_obs, n_samples))

    else:
        assert unif_samples is not None
        assert unif_samples.shape[0] == n_obs

    n_samples = unif_samples.shape[1]
    e_z_cumsum = np.cumsum(e_z, axis = 1)

    num_heavy_clusters_vec = np.zeros(n_samples)

    # z_sample is a n_obs x n_samples matrix of cluster belongings
    z_sample = _get_clusters_from_ez_and_unif_samples(e_z_cumsum, unif_samples)

    for i in range(n_clusters):
        # get number of clusters with at least enough points above the threshold
        num_heavy_clusters_vec += np.sum(z_sample == i, axis = 0) > threshold

    return np.mean(num_heavy_clusters_vec), np.var(num_heavy_clusters_vec)
def tau_update(e_z, alpha):
    k_approx = np.shape(e_z)[1]
    sum_e_z = np.sum(e_z, axis = 0)
    sum_e_z_upper = np.cumsum(sum_e_z[::-1])[::-1]

    #cum_sum_z = np.concatenate(([0.0], np.cumsum(sum_e_z)[:-2]))

    tau_update = np.zeros((k_approx - 1, 2))
    tau_update[:, 0] = sum_e_z[:-1] + 1
    tau_update[:, 1] = alpha + sum_e_z_upper[1:]

    return tau_update
예제 #32
0
def simple_admixture_demo(x=np.random.normal(size=7)):
    t = np.cumsum(np.exp(x[:5]))
    p = 1.0 / (1.0 + np.exp(x[5:]))

    ret = momi.DemographicModel(1., .25)
    ret.add_leaf("b")
    ret.add_leaf("a")
    ret.move_lineages("a", 2, t[1], p=1. - p[1])
    ret.move_lineages("a", 3, t[0], p=1. - p[0])
    ret.move_lineages(2, 3, t[2])
    ret.move_lineages(3, "b", t[3])
    ret.move_lineages("a", "b", t[4])
    return ret
예제 #33
0
def inds_to_effect_change(leverage, desired_delta):
    # Argsort sorts low to high.
    # We are removing points, so multiply by -1.
    sort_inds = np.argsort(leverage * np.sign(desired_delta))
    deltas = -1 * np.cumsum(leverage[sort_inds])
    change_sign_inds = np.argwhere(
        np.sign(desired_delta) * (desired_delta - deltas) <= 0.)
    if len(change_sign_inds) > 0:
        first_ind_change_sign = np.min(change_sign_inds)
        remove_inds = sort_inds[:(first_ind_change_sign + 1)]
        return remove_inds
    else:
        return None
예제 #34
0
def projectSimplex(mat):
    """ project each row vector to the simplex
    """
    nPoints, nVars = mat.shape
    mu = np.fliplr(np.sort(mat, axis=1))
    sum_hist = np.cumsum(mu, axis=1)
    flag = (mu - 1./np.tile(np.arange(1,nVars+1),(nPoints,1))*(sum_hist-1) > 0)
    
    f_flag = lambda flagPoint: len(flagPoint) - 1 - \
            flagPoint[::-1].argmax()
    lastTrue = map(f_flag, flag)
    
    sm_row = sum_hist[np.arange(nPoints), lastTrue]
    
    theta = (sm_row - 1)*1./(np.array(lastTrue)+1.)
    
    w = np.maximum(mat - np.tile(theta, (nVars,1)).T, 0.)
    
    return w
예제 #35
0
def projectSimplex_vec(v):
    """ project vector v onto the probability simplex
    Parameter
    ---------
    v: shape(nVars,)
        input vector

    Returns
    -------
    w: shape(nVars,)
        projection of v onto the probability simplex
    """

    nVars = v.shape[0]
    mu = np.sort(v,kind='quicksort')[::-1]
    sm_hist = np.cumsum(mu)
    flag = (mu - 1./np.arange(1,nVars+1)*(sm_hist-1) > 0)
    
    lastTrue = len(flag) - 1 - flag[::-1].argmax()
    sm_row = sm_hist[lastTrue]
     
    theta = 1./(lastTrue+1) * (sm_row - 1)
    w = np.maximum(v-theta, 0.)
    return w
예제 #36
0
 def fun(x): return to_scalar(np.cumsum(x))
 d_fun = lambda x : to_scalar(grad(fun)(x))
예제 #37
0
def moving_average(a, n=10) :
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n
예제 #38
0
 def unpack_all_params(all_params):
     all_layer_params = np.array_split(all_params,np.cumsum(num_params_each_layer))
     return all_layer_params
예제 #39
0
 def unpack_layer_params(params):
     gp_params = np.array_split(params, np.cumsum(num_params_each_output))
     return gp_params
예제 #40
0
파일: ar.py 프로젝트: jan-matthis/autohmm
    def sample(self, n_samples=2000, observed_states=None,
               init_samples=None, init_state=None, random_state=None):
        """Generate random samples from the self.

        Parameters
        ----------
        n : int
            Number of samples to generate.

        observed_states : array
            If provided, states are not sampled.

        random_state: RandomState or an int seed
            A random number generator instance. If None is given, the
            object's random_state is used

        init_state : int
            If provided, initial state is not sampled.

        init_samples : array, default: None
            If provided, initial samples (for AR) are not sampled.

        E : array-like, shape (n_samples, n_inputs)
            Feature matrix of individual inputs.

        Returns
        -------
        samples : array_like, length (``n_samples``)
                  List of samples

        states : array_like, shape (``n_samples``)
                 List of hidden states (accounting for tied states by giving
                 them the same index)
        """
        if random_state is None:
            random_state = self.random_state
        random_state = check_random_state(random_state)

        samples = np.zeros(n_samples)
        states = np.zeros(n_samples)

        order = self.n_lags

        if init_state is None:
            startprob_pdf = np.exp(np.copy(self._log_startprob))
            start_dist = stats.rv_discrete(name='custm',
                                      values=(np.arange(startprob_pdf.shape[0]),
                                                        startprob_pdf),
                                      seed=random_state)
            start_state = start_dist.rvs(size=1)[0]
        else:
            start_state = init_state

        if self.n_lags > 0:
            if init_samples is None:
                """
                n_init_samples = order + 10
                noise = np.sqrt(1.0/self._precision_[start_state]) * \
                        random_state.randn(n_init_samples)

                pad_after = n_init_samples - order - 1
                col = np.pad(1*self._alpha_[start_state, :], (1, pad_after),
                             mode='constant')
                row = np.zeros(n_init_samples)
                col[0] = row[0] = 1

                A = toeplitz(col, row)
                init_samples = np.dot(pinv(A), noise + self._mu_[start_state])
                # TODO: fix bug with n_lags > 1, blows up
                """
                init_samples = 0.01*np.ones((self.n_lags, self.n_features))  # temporary fix

        if observed_states is None:
            transmat_pdf = np.exp(np.copy(self._log_transmat))
            transmat_cdf = np.cumsum(transmat_pdf, 1)

            states[0] = (transmat_cdf[start_state] >
                         random_state.rand()).argmax()

            transmat_pdf = np.exp(self._log_transmat)
            transmat_cdf = np.cumsum(transmat_pdf, 1)

            nrand = random_state.rand(n_samples)
            for idx in range(1,n_samples):
                newstate = (transmat_cdf[states[idx-1]] > nrand[idx-1]).argmax()
                states[idx] = newstate
        else:
            states = observed_states

        precision = np.copy(self._precision_)
        for idx in range(n_samples):
            state_ = int(states[idx])
            var_ = np.sqrt(1/precision[state_])

            if self.n_lags == 0:
                mean_ = np.copy(self._mu_[state_])
            else:
                mean_ = np.copy(self._mu_[state_])

                for lag in range(1, order+1):
                    if idx < lag:
                        prev_ = init_samples[len(init_samples)-lag]
                    else:
                        prev_ = samples[idx-lag]
                    mean_ += np.copy(self._alpha_[state_, lag-1])*prev_

            samples[idx] = norm.rvs(loc=mean_, scale=var_, size=1,
                                    random_state=random_state)

        states = self._process_sequence(states)
        return samples, states
예제 #41
0
        def sample(self, n_samples=2000, observed_states=None,
                   init_samples=None, init_state=None, random_state=None):
            """Generate random samples from the self.

            Parameters
            ----------
            n : int
                Number of samples to generate.

            observed_states : array
                If provided, states are not sampled.

            random_state: RandomState or an int seed
                A random number generator instance. If None is given, the
                object's random_state is used

            init_state : int
                If provided, initial state is not sampled.

            init_samples : array, default: None
                If provided, initial samples (for AR) are not sampled.

            E : array-like, shape (n_samples, n_inputs)
                Feature matrix of individual inputs.

            Returns
            -------
            samples : array_like, length (``n_samples``, ``n_features``)
                      List of samples

            states : array_like, shape (``n_samples``)
                     List of hidden states (accounting for tied states by giving
                     them the same index)
            """
            if random_state is None:
                random_state = self.random_state
            random_state = check_random_state(random_state)


            samples = np.zeros((n_samples, self.n_features))
            states = np.zeros(n_samples)

            order = self.n_lags

            if init_state is None:
                startprob_pdf = np.exp(np.copy(self._log_startprob))
                start_dist = stats.rv_discrete(name='custm',
                                          values=(np.arange(startprob_pdf.shape[0]),
                                                            startprob_pdf),
                                          seed=random_state)
                start_state = start_dist.rvs(size=1)[0]

            else:
                start_state = init_state

            if self.n_lags > 0:
                if init_samples is None:
                    init_samples = 0.01*np.ones((self.n_lags, self.n_features))  # TODO: better init

            if observed_states is None:
                transmat_pdf = np.exp(np.copy(self._log_transmat))
                transmat_cdf = np.cumsum(transmat_pdf, 1)

                states[0] = (transmat_cdf[start_state] >
                             random_state.rand()).argmax()

                transmat_pdf = np.exp(self._log_transmat)
                transmat_cdf = np.cumsum(transmat_pdf, 1)

                nrand = random_state.rand(n_samples)
                for idx in range(1,n_samples):
                    newstate = (transmat_cdf[states[idx-1]] > nrand[idx-1]).argmax()
                    states[idx] = newstate

            else:
                states = observed_states
            precision = np.copy(self._precision_)
            for idx in range(n_samples):
                state_ = int(states[idx])


                covar_ = np.linalg.inv(precision[state_])

                if self.n_lags == 0:
                    mean_ = np.copy(self._mu_[state_])
                else:
                    mean_ = np.copy(self._mu_[state_])

                    for lag in range(1, order+1):
                        if idx < lag:
                            prev_ = init_samples[len(init_samples)-lag]
                        else:
                            prev_ = samples[idx-lag]

                        mean_ += np.copy(self._alpha_[state_, lag-1])*prev_


                samples[idx] = self.multivariate_t_rvs(mean_, covar_,
                                                       random_state)

            states = self._process_sequence(states)

            return samples, states