Beispiel #1
0
def plot_HalfNorm2():
    #X = np.random.normal(-2, 1, size=(1000, 1))  ## scipy runtime warning (possibly due to running outdated version)
    X = np.random.sample(size=(1000, 1))
    X[::2] += 4
    modeln = GeneralMixtureModel.from_samples(NormalDistribution, 2, X)
    modelh = GeneralMixtureModel.from_samples(HalfNormalDistribution, 2, X)
    x = np.arange(-15, 15, 0.1)
    fig, ax = plt.subplots(figsize=(7, 4))
    ax.plot(x, modeln.probability(x), label='Normal Mixture')
    ax.plot(x, set_y(x, modelh), label='Half Norm Mixture')
    ax.set_ylabel('Probability', fontsize=10)
    ax.legend(fontsize=10)
    plt.savefig('/scratch/chd5n/test.png', bbox_inches='tight')
    print('plot written to', '/scratch/chd5n/test.png')
Beispiel #2
0
    def oriHMMParams(self):
        """
        Set initial parameters for the Hidden Markov Model (HMM).
        
        Attributes
        ----------
        HMMParams : dict
            Has 3 keys: "A", state transition matrix, "B" (emission probabilities),
            specifying parameters (Means, Variances, Weights) of the mixture
            Gaussian distributions for each hidden state, and "pi", indicating
            the hidden state weights. This dict will be updated after learning
            procedure.
        """
        hmm = HiddenMarkovModel()
        # GMM emissions
        # 5 Hidden States:
        # 0--start, 1--downstream, 2--no bias, 3--upstream, 4--end
        numdists = 3  # Three-distribution Gaussian Mixtures
        var = 7.5 / (numdists - 1)
        means = [[], [], [], [], []]
        for i in range(numdists):
            means[4].append(i * 7.5 / (numdists - 1) + 2.5)
            means[3].append(i * 7.5 / (numdists - 1))
            means[2].append((i - (numdists - 1) / 2) * 7.5 / (numdists - 1))
            means[1].append(-i * 7.5 / (numdists - 1))
            means[0].append(-i * 7.5 / (numdists - 1) - 2.5)
        states = []
        for i, m in enumerate(means):
            tmp = []
            for j in m:
                tmp.append(NormalDistribution(j, var))
            mixture = GeneralMixtureModel(tmp)
            states.append(State(mixture, name=str(i)))
        hmm.add_states(*tuple(states))

        # Transmission matrix
        #A = [[0., 1., 0., 0., 0.],
        #    [0., 0.4, 0.3, 0.3, 0.],
        #    [0.05, 0., 0.5, 0.45, 0.],
        #    [0., 0., 0., 0.5, 0.5],
        #    [0.99, 0., 0.01, 0., 0.]]
        hmm.add_transition(states[0], states[1], 1)
        hmm.add_transition(states[1], states[1], 0.4)
        hmm.add_transition(states[1], states[2], 0.3)
        hmm.add_transition(states[1], states[3], 0.3)
        hmm.add_transition(states[2], states[0], 0.05)
        hmm.add_transition(states[2], states[2], 0.5)
        hmm.add_transition(states[2], states[3], 0.45)
        hmm.add_transition(states[3], states[3], 0.5)
        hmm.add_transition(states[3], states[4], 0.5)
        hmm.add_transition(states[4], states[0], 0.99)
        hmm.add_transition(states[4], states[2], 0.01)

        pi = [0.05, 0.3, 0.3, 0.3, 0.05]
        for i in range(len(states)):
            hmm.add_transition(hmm.start, states[i], pi[i])

        hmm.bake()

        return hmm
Beispiel #3
0
def load_segmentation_model(modeldata):
    model = HiddenMarkovModel('model')

    states = {}
    for s in modeldata:
        if len(s['emission']) == 1:
            emission = NormalDistribution(*s['emission'][0][:2])
        else:
            weights = np.array([w for _, _, w in s['emission']])
            dists = [NormalDistribution(mu, sigma)
                     for mu, sigma, _ in s['emission']]
            emission = GeneralMixtureModel(dists, weights=weights)
        state = State(emission, name=s['name'])

        states[s['name']] = state
        model.add_state(state)
        if 'start_prob' in s:
            model.add_transition(model.start, state, s['start_prob'])

    for s in modeldata:
        current = states[s['name']]
        for nextstate, prob in s['transition']:
            model.add_transition(current, states[nextstate], prob)

    model.bake()

    return model
Beispiel #4
0
    def __init__(self, amps, funcs, limits=(d.min_x, d.max_x)):
        """
        Object to define a mixture probability distribution

        Parameters
        ----------
        amps: ndarray, float
            array with one relative amplitude per component
        funcs: list, chippr.gauss or chippr.discrete objects
            list of components
        limits: tuple or list or numpy.ndarray, float, optional
            minimum and maximum sample values to return
        """

        self.amps = amps/np.sum(amps)
        self.cumamps = np.cumsum(self.amps)
        self.n_comps = len(self.amps)

        self.funcs = funcs#[chippr.gauss(self.means[c], self.sigmas[c]**2) for c in range(self.n_comps)]
        # print('gmix before:')
        # for c in range(self.n_comps):
        #     print('gmix '+str((c, type(self.funcs[c]))))
        self.funcs = [func.dist for func in self.funcs]
        # print('gmix after:')
        # for c in range(self.n_comps):
        #     print('gmix '+str((c, type(self.funcs[c]))))

        self.dims = np.shape(np.array(limits).T)[0]
        self.min_x = limits[0]
        self.max_x = limits[1]
        # print("amps="+str(self.amps))
        self.dist = GMM(self.funcs, weights=self.amps)
Beispiel #5
0
    def __init__(self, bin_ends, weights):
        """
        Binned function class for any discrete function

        Parameters
        ----------
        bin_ends: numpy.ndarray, float
            endpoints of bins
        weights: numpy.ndarray, float
            relative weights associated with each bin

        Notes
        -----
        TO DO: Rename to piecewise constant or somesuch
        """
        self.bin_ends = bin_ends
        self.dbins = self.bin_ends[1:] - self.bin_ends[:-1]
        self.n_bins = len(self.bin_ends)-1
        self.bin_range = range(self.n_bins)

        self.weights = weights
        # print('dbins: '+str(self.dbins))
        # print('weights: '+str((self.weights)))
        # print('sumweights: '+str((np.sum(self.weights))))
        # print('dotweights: '+str((np.dot(self.weights, self.dbins))))
        self.normweights = np.cumsum(self.weights) / np.sum(self.weights)
        # print('normweights: '+str(self.normweights))
        self.distweights = np.cumsum(self.weights) / np.dot(self.weights, self.dbins)
        # print('distweights: '+str(self.distweights))

        self.funcs = [UD(self.bin_ends[i], self.bin_ends[i+1]) for i in self.bin_range]
        if self.n_bins > 1:
            self.dist = GMM(self.funcs, weights=self.weights)
        else:
            self.dist = self.funcs[0]
Beispiel #6
0
    def addData(self, data, score):
        score = score.clip(min=1e-5)
        self.data = data
        self.score = score

        score_normed = self.score / np.linalg.norm(self.score, ord=1)
        try:
            model = GeneralMixtureModel.from_samples(
                MultivariateGaussianDistribution,
                n_components=self.n_comp,
                X=self.data,
                weights=score_normed)
            self.model = model
        except:
            logging.info("catched an exception")
Beispiel #7
0
    def fit_mixture(self,
                    pos_left,
                    pos_right,
                    weights,
                    n_components=2,
                    tol=1e-4,
                    maxiter=4000,
                    verbose=False):
        left, right = np.asarray(pos_left), np.asarray(pos_right)
        weights = np.asarray(weights)
        debugs = list() if verbose else None
        centers = (left + right) / 2.0
        init_gmm = GeneralMixtureModel.from_samples(
            MultivariateGaussianDistribution,
            n_components=n_components,
            X=centers,
            weights=weights,
            stop_threshold=0.01,
            n_jobs=2)

        init_mus, init_covs = list(), list()
        init_comp_ws = np.array(init_gmm.weights)
        init_comp_ws /= np.sum(init_comp_ws)
        for i in range(n_components):
            paras = init_gmm.distributions[i].parameters
            init_mus.append(np.array(paras[0]))
            init_covs.append(np.array(paras[1]))

        init_paras = self._paras_compose_(init_mus, init_covs,
                                          list(init_comp_ws))

        method = 'Nelder-Mead'
        res = opt.minimize(self._mixture_optpara,
                           init_paras,
                           args=(left, right, weights, n_components, debugs),
                           method=method,
                           tol=tol,
                           options={
                               'maxiter': maxiter,
                               'disp': verbose
                           })
        if verbose:
            print("Method:{}; Initial parameter: {};".format(
                method, init_paras))
            print("Converged Parameter: {}".format(res.x))

        mus, covs, comp_ws = self._paras_decompose_(res.x, n_components)
        return mus, covs, comp_ws, res.fun
Beispiel #8
0
def ghmm_model(states_labels: tuple,
               transitions: tuple,
               init_prob: tuple,
               end_prob: tuple,
               means: list,
               vars: list) -> HiddenMarkovModel:
    """

    :param states_labels:
    :param transitions:
    :param init_prob:
    :param end_prob:
    :param means:
    :param vars:
    :return:
    """
    hmm_model = HiddenMarkovModel()

    mix_num = len(vars[0])
    states = []
    for state_i, state in enumerate(states_labels):
        mixture = []
        for mix_i in range(mix_num):
            init_mean = means[state_i][mix_i]
            init_var = vars[state_i][mix_i]
            mixture.append(NormalDistribution(init_mean, init_var))
        states.append(State(GeneralMixtureModel(mixture), name=str(state_i)))
    hmm_model.add_states(*tuple(states))

    for row in range(len(states_labels)):
        for col in range(len(states_labels)):
            prob = transitions[row][col]
            if prob != 0.:
                hmm_model.add_transition(states[row], states[col], prob)
    for state_i, prob in enumerate(init_prob):
        if prob != 0.:
            hmm_model.add_transition(hmm_model.start, states[state_i], prob)
    for state_i, prob in enumerate(end_prob):
        if prob != 0.:
            hmm_model.add_transition(states[state_i], hmm_model.end, prob)

    hmm_model.bake()

    return hmm_model
Beispiel #9
0
    def oriHMMParams(self, numdists=3):
        """
        Set initial parameters for the Hidden Markov Model (HMM).
        
        """
        # GMM emissions
        # 3 Hidden States:
        # 0--downstream, 1--no bias, 2--upstream
        if numdists == 1:
            dists = [
                NormalDistribution(-2.5, 7.5),
                NormalDistribution(0, 7.5),
                NormalDistribution(2.5, 7.5)
            ]
        else:
            var = 7.5 / (numdists - 1)
            means = [[], [], []]
            for i in range(numdists):
                means[0].append(i * 7.5 / (numdists - 1) + 2.5)
                means[1].append(i * 7.5 * (-1)**i / (numdists - 1))
                means[2].append(-i * 7.5 / (numdists - 1) - 2.5)

            dists = []
            for i, m in enumerate(means):
                tmp = []
                for j in m:
                    tmp.append(NormalDistribution(j, var))
                mixture = GeneralMixtureModel(tmp)
                dists.append(mixture)

        # transition matrix
        A = [[0.34, 0.33, 0.33], [0.33, 0.34, 0.33], [0.33, 0.33, 0.34]]
        starts = np.ones(3) / 3

        hmm = HiddenMarkovModel.from_matrix(A,
                                            dists,
                                            starts,
                                            state_names=['0', '1', '2'],
                                            name='mixture{0}'.format(numdists))

        return hmm
Beispiel #10
0
def fit_mixture_model(counts):
    ''' Code adapted from https://github.com/josephreplogle/guide_calling '''

    data = np.log2(counts + 1)

    reshaped_data = data.reshape(-1, 1)

    xs = np.linspace(-2, max(data) + 2, 1000)

    # Re-fit the model until it has converged with both components given non-zero weight
    # and the Poisson component in the first position with lower mean.

    while True:
        model = GeneralMixtureModel.from_samples(
            [PoissonDistribution, NormalDistribution], 2, reshaped_data)

        if 0 in model.weights:
            # One component was eliminated
            continue
        elif np.isnan(model.probability(xs)).any():
            continue
        elif model.distributions[0].parameters[0] > model.distributions[
                1].parameters[0]:
            continue
        elif model.distributions[0].name != 'PoissonDistribution':
            continue
        else:
            break

    labels = model.predict(reshaped_data)

    xs = np.linspace(0, max(data) + 2, 1000)
    p_second_component = model.predict_proba(xs.reshape(-1, 1))[:, 1]
    threshold = 2**xs[np.argmax(p_second_component >= 0.5)]

    return labels, threshold
Beispiel #11
0
def getMixtureModelCutOff(samples,alpha,mu,sigma):
    mixture_m = GeneralMixtureModel([ ExponentialDistribution(alpha), NormalDistribution(mu,sigma)] )
    model = mixture_m.fit(samples.reshape(-1,1))
    pred_alpha = model.distributions[0].parameters[0]
    return expon.ppf(0.95,0,1/pred_alpha)
Beispiel #12
0
    def _segment(self, arr, components=2):

        nonzero = arr[arr > 0]
        idx = self.hampel_filter(np.log2(nonzero))
        filtered = nonzero[idx]

        log_gmm = self.get_states(np.log2(filtered))
        log_means, log_probs = log_gmm.means_.ravel(), log_gmm.weights_
        ln_gmm = self.get_states(filtered) # to improve the sensitivity
        ln_means, ln_probs = ln_gmm.means_.ravel(), ln_gmm.weights_
        if (len(log_means) == 1):
            means, probs = ln_means, ln_probs
            scale = 'linear'
        else:
            means, probs = log_means, log_probs
            scale = 'log'

        logger.info('Estimated HMM state number: {0} ({1} scale)'.format(len(means), scale))
        model = HiddenMarkovModel()
        # GMM emissions
        dists = []
        for m in means:
            tmp = []
            for i in range(components):
                e = m + (-1)**i * ((i+1)//2) * 0.5
                s = 0.5
                tmp.append(NormalDistribution(e, s))
            mixture = State(GeneralMixtureModel(tmp), name=str(m))
            dists.append(mixture)
        model.add_states(*tuple(dists))
        # transition matrix
        for i in range(len(means)):
            for j in range(len(means)):
                if i==j:
                    model.add_transition(dists[i], dists[j], 0.8)
                else:
                    model.add_transition(dists[i], dists[j], 0.2/(len(means)-1))
        
        # starts and ends
        for i in range(len(means)):
            model.add_transition(model.start, dists[i], probs[i])
        
        model.bake()

        # training sequences
        tmp = np.zeros(nonzero.size)
        tmp[idx] = filtered
        newarr = np.zeros(arr.size)
        newarr[arr > 0] = tmp

        if len(means) > 1:
            model.fit(self.pieces(newarr, scale=scale), algorithm='baum-welch', n_jobs=self.n_jobs,
                    max_iterations=5000, stop_threshold=2e-4)
            
            queue = newarr[newarr > 0]
            
            if scale=='log':
                seq = np.r_[[s.name for i, s in model.viterbi(np.log2(queue))[1][1:]]]
            else:
                seq = np.r_[[s.name for i, s in model.viterbi(queue)[1][1:]]]
            seg = self.assign_cnv(queue, seq)
            
            predicted = np.zeros(newarr.size)
            predicted[newarr > 0] = seg
            seg = self.call_intervals(predicted)
        else:
            seg = [(0, newarr.size)]
        
        return newarr, seg, scale
Beispiel #13
0
 def mixture(self, args):
     weights = args[0]
     distributions = args[1]
     return GeneralMixtureModel(distributions, weights=weights)
Beispiel #14
0
    def __init__(self, dim , seed=None): #
        
        K = 9
        theta0=[.5,.5]
        beta=np.ones(K)
        Psi = .1*np.diag(np.ones(dim))
        #mu0= np.zeros(dim) 
        #lambd=.1,
        nu=dim+2.
        
        
        rstate = np.random.get_state()
        np.random.seed(seed)

        
        unif_dist = UniformDistribution(0.,1.)
        
        self.theta0 = theta0        
        beta_dist = DirichletDistribution(beta)

        self.dim = Psi.shape[0]

        self.dists = [] 

        #same weights for both
        weights = beta_dist.sample()
            
        mus = []
        for i,_ in enumerate(theta0):
            
            
            #weights = beta_dist.sample()
            #print(weights)
            mix = []
            for j,_ in enumerate(weights):
                
                
                if j%3==0:
                    Sigma = invwishart.rvs(df=nu, scale=Psi)
                    
                elif j%3==1:
                    Sigma = invwishart.rvs(df=nu, scale=.01*Psi)
                else:
                    Sigma = invwishart.rvs(df=nu, scale=.0001*Psi)

                if i==0:
                    mu = unif_dist.sample(self.dim) 
                    #mu =MultivariateGaussianDistribution(mu0,Sigma/lambd).sample()
                    mus.append(mu)
                else:
                    mu = mus[j]
                
                mix.append( MultivariateGaussianDistribution(mu, Sigma) )
                
            model = GeneralMixtureModel(mix, weights=weights)
            self.dists.append(model)
            
            
        for d in self.dists:
            print(d)
        
        self.rstate = np.random.get_state()
        np.random.set_state(rstate)
Beispiel #15
0
from pomegranate import (
    NaiveBayes,
    NormalDistribution,
    UniformDistribution,
    ExponentialDistribution,
    GeneralMixtureModel,
    MultivariateGaussianDistribution,
    BernoulliDistribution,
)
import pandas as pd
import numpy as np

X = pd.DataFrame({"A": [1, 0, 1, 0, 1], "B": [1, 1, 1, 1, 0]})

x = BernoulliDistribution(0.4)

vals = []
[vals.append(x.sample()) for i in range(1000)]

model = NaiveBayes([
    NormalDistribution(5, 2),
    UniformDistribution(0, 10),
    ExponentialDistribution(1.0)
])
model.predict(np.array([[10]]))

model = GeneralMixtureModel.from_samples(MultivariateGaussianDistribution,
                                         n_components=3,
                                         X=X)
Beispiel #16
0
# for now, no ratio in data (no rates A, B or C in this dataset)

only_flux = True
scale_flux = False

hdulist = pyfits.open('../iirc_data/all_data_for_ml.fits')
data = hdulist[1].data

X_flux, X, data_thr, data_fr_en, data_fr_err = get_iirc_data(
    data, only_flux=only_flux, scale_flux=scale_flux, thresholded=True)

# GMM with 3 components:
np.random.seed(0)

gmm = GeneralMixtureModel(MultivariateGaussianDistribution, n_components=3)
gmm.fit(X)
preds = gmm.predict(X)
probs = gmm.predict_proba(X)

data_thr['preds'] = pd.Series(preds).astype("category")

color_key = ["red", "yellow", "blue", "grey", "black", "purple", "pink",
             "brown", "green", "orange"]  # Spectral9
color_key = color_key[:len(set(preds))+1]

covs = np.array([np.array(gmm.distributions[m].parameters[1])
                 for m in range(len(gmm.distributions))])
means = np.array([np.array(gmm.distributions[m].parameters[0])
                  for m in range(len(gmm.distributions))])
Beispiel #17
0
class discrete(object):
    def __init__(self, bin_ends, weights):
        """
        Binned function class for any discrete function

        Parameters
        ----------
        bin_ends: numpy.ndarray, float
            endpoints of bins
        weights: numpy.ndarray, float
            relative weights associated with each bin

        Notes
        -----
        TO DO: Rename to piecewise constant or somesuch
        """
        self.bin_ends = bin_ends
        self.dbins = self.bin_ends[1:] - self.bin_ends[:-1]
        self.n_bins = len(self.bin_ends)-1
        self.bin_range = range(self.n_bins)

        self.weights = weights
        # print('dbins: '+str(self.dbins))
        # print('weights: '+str((self.weights)))
        # print('sumweights: '+str((np.sum(self.weights))))
        # print('dotweights: '+str((np.dot(self.weights, self.dbins))))
        self.normweights = np.cumsum(self.weights) / np.sum(self.weights)
        # print('normweights: '+str(self.normweights))
        self.distweights = np.cumsum(self.weights) / np.dot(self.weights, self.dbins)
        # print('distweights: '+str(self.distweights))

        self.funcs = [UD(self.bin_ends[i], self.bin_ends[i+1]) for i in self.bin_range]
        if self.n_bins > 1:
            self.dist = GMM(self.funcs, weights=self.weights)
        else:
            self.dist = self.funcs[0]

    def pdf(self, xs):
        return self.evaluate(xs)

    def evaluate_one(self, x):
        """
        Function to evaluate the discrete probability distribution at one point

        Parameters
        ----------
        x: float
            value at which to evaluate discrete probability distribution

        Returns
        -------
        p: float
            value of discrete probability distribution at x
        """
        p = self.dist.probability(x)
        return p

    def evaluate(self, xs):
        """
        Function to evaluate the discrete probability distribution at many points

        Parameters
        ----------
        xs: ndarray, float
            values at which to evaluate discrete probability distribution

        Returns
        -------
        ps: ndarray, float
            values of discrete probability distribution at xs
        """
        # ps = np.array([self.evaluate_one(x) for x in xs])
        ps = self.dist.probability(xs)
        return ps

    def sample_one(self):
        """
        Function to sample a single value from discrete probability distribution

        Returns
        -------
        x: float
            a single point sampled from the discrete probability distribution
        """
        # r = np.random.random()
        # k = bisect.bisect(self.normweights, r)
        #
        # x = np.random.uniform(low=self.bin_ends[k], high=self.bin_ends[k+1])
        x = self.dist.sample(1)
        return x

    def sample(self, n_samps):
        """
        Function to take samples from discrete probability distribution

        Parameters
        ----------
        n_samps: int
            number of samples to take

        Returns
        -------
        xs: ndarray, float
            array of points sampled from the discrete probability distribution
        """
        # print('discrete trying to sample '+str(n_samps)+' from '+str(self.dist))
        # xs = np.array([self.sample_one() for n in range(n_samps)])
        xs = np.array(self.dist.sample(n_samps))
        # print('discrete sampled '+str(n_samps)+' from '+str(self.dist))
        return xs
Beispiel #18
0
    def _initDists(self, X, distribution=MultivariateGaussianDistribution):
        technique = "R_MV-GMM"  # mixture of multivariate gaussain distribution
        if (technique == "GMM"):
            # gaussian mixture model
            #// uvgd = NormalDistribution.from_samples(X)
            #// gmm = GeneralMixtureModel([uvgd.copy() for _ in range(self.nmix)])
            gmm = GeneralMixtureModel.from_samples(
                distributions=[NormalDistribution for _ in range(self.nmix)],
                X=X)
            dists = [gmm.copy() for _ in range(self.statesNumber)]
        elif (technique == "MV-GMM"):
            # multivariate gaussian mixture model
            #// mvgd = MultivariateGaussianDistribution.from_samples(X)
            #// gmm = GeneralMixtureModel([mvgd.copy() for _ in range(self.nmix)])
            gmm = GeneralMixtureModel.from_samples(distributions=[
                MultivariateGaussianDistribution for _ in range(self.nmix)
            ],
                                                   X=X,
                                                   n_components=3)
            dists = [gmm.copy() for _ in range(self.statesNumber)]
        elif (technique == "MVG"):
            self._initkmeans(X=X, numClasses=self.statesNumber)
            dists = [
                MultivariateGaussianDistribution.from_samples(X=X[y == i])
                for i in range(self.statesNumber)
            ]
        elif (technique == "R_GMM"):
            # random gaussian mixture model
            randNormal = lambda: NormalDistribution(np.random.randint(1, 10), 1
                                                    )
            randGMM = lambda: GeneralMixtureModel(
                [randNormal() for _ in range(self.nmix)])
            dists = [randGMM() for _ in range(self.statesNumber)]
        elif (technique == "R_MV-GMM"):
            # random multivariate gaussian mixture model
            randGMM = lambda: GeneralMixtureModel(
                [randMVG() for _ in range(self.nmix)])
            dists = [randGMM() for _ in range(self.statesNumber)]
        return dists

        #* not completed:
        #! GMM-HMM-k
        y = self._initkmeans(X, self.statesNumber)
        # list(map(print, y))
        return [
            GeneralMixtureModel.from_samples(distribution,
                                             X=X[y == i],
                                             n_components=self.nmix)
            for i in range(self.statesNumber)
        ]

        #! Kmeans init
        if not isinstance(X, BaseGenerator):
            data_generator = SequenceGenerator(X, None, None)
        else:
            data_generator = X

        initialization_batch_size = len(data_generator)

        X_ = []
        data = data_generator.batches()
        for i in range(initialization_batch_size):
            batch = next(data)
            X_.extend(batch[0])

        X_concat = np.concatenate(X_)
        if X_concat.ndim == 1:
            X_concat = X_concat.reshape(X_concat.shape[0], 1)
        n, d = X_concat.shape
        clf = Kmeans(self.statesNumber, init="kmeans++",
                     n_init=1)  # init should be one of
        clf.fit(X_concat, max_iterations=None, batches_per_epoch=None)
        y = clf.predict(X_concat)
        if callable(distribution):
            if d == 1:
                dists = [
                    distribution.from_samples(X_concat[y == i][:, 0])
                    for i in range(self.statesNumber)
                ]
            elif distribution.blank().d > 1:
                dists = [
                    distribution.from_samples(X_concat[y == i])
                    for i in range(self.statesNumber)
                ]
            else:
                print("error")
        return dists
Beispiel #19
0
class gmix(object):

    def __init__(self, amps, funcs, limits=(d.min_x, d.max_x)):
        """
        Object to define a mixture probability distribution

        Parameters
        ----------
        amps: ndarray, float
            array with one relative amplitude per component
        funcs: list, chippr.gauss or chippr.discrete objects
            list of components
        limits: tuple or list or numpy.ndarray, float, optional
            minimum and maximum sample values to return
        """

        self.amps = amps/np.sum(amps)
        self.cumamps = np.cumsum(self.amps)
        self.n_comps = len(self.amps)

        self.funcs = funcs#[chippr.gauss(self.means[c], self.sigmas[c]**2) for c in range(self.n_comps)]
        # print('gmix before:')
        # for c in range(self.n_comps):
        #     print('gmix '+str((c, type(self.funcs[c]))))
        self.funcs = [func.dist for func in self.funcs]
        # print('gmix after:')
        # for c in range(self.n_comps):
        #     print('gmix '+str((c, type(self.funcs[c]))))

        self.dims = np.shape(np.array(limits).T)[0]
        self.min_x = limits[0]
        self.max_x = limits[1]
        # print("amps="+str(self.amps))
        self.dist = GMM(self.funcs, weights=self.amps)

    def pdf(self, xs):
        return self.evaluate(xs)

    def evaluate_one(self, x):
        """
        Function to evaluate Gaussian mixture
        once

        Parameters
        ----------
        x: float
            value at which to evaluate Gaussian mixture

        Returns
        -------
        p: float
            probability associated with x
        """
        # p = 0.
        # for c in range(self.n_comps):
        #     p += self.amps[c] * self.funcs[c].evaluate_one(x)
        p = self.dist.probability(x)
        return p

    def evaluate(self, xs):
        """
        Function to evaluate the Gaussian mixture probability distribution at many points

        Parameters
        ----------
        xs: ndarray, float
            values at which to evaluate Gaussian mixture probability distribution

        Returns
        -------
        ps: ndarray, float
            values of Gaussian mixture probability distribution at xs
        """
        # ps = np.zeros(len(xs))
        # for c in range(self.n_comps):
        #     ps += self.amps[c] * self.funcs[c].evaluate(xs)
        ps = self.dist.probability(xs)
        return ps

    def sample_one(self):
        """
        Function to sample a single value from Gaussian mixture probability distribution

        Returns
        -------
        x: float
            a single point sampled from the Gaussian mixture probability distribution
        """

        # x = -1. * np.ones(self.dims)
        # #don't do this every time!
        # min_x = self.min_x * np.ones(self.dims)
        # max_x = self.max_x * np.ones(self.dims)
        #
        # while np.any(np.less(x, min_x)) or np.any(np.greater(x, max_x)):
        #     r = np.random.uniform(0., self.cumamps[-1])
        #     c = 0
        #     for k in range(1, self.n_comps):
        #         if r > self.cumamps[k-1]:
        #             c = k
        #     x = self.funcs[c].sample_one()
        x = self.dist.sample(1)
        return x

    def sample(self, n_samps):
        """
        Function to take samples from Gaussian mixture probability distribution

        Parameters
        ----------
        n_samps: int
            number of samples to take

        Returns
        -------
        xs: ndarray, float
            array of points sampled from the Gaussian mixture probability distribution
        """
        # print('gmix trying to sample '+str(n_samps)+' from '+str(self.dist))
        # xs = np.array([self.sample_one() for n in range(n_samps)])
        # print(self.dist.to_json)
        xs = np.array(self.dist.sample(n_samps))
        # print('gmix sampled '+str(n_samps)+' from '+str(self.dist))
        return xs
Beispiel #20
0
def randMVGMM(n=40, nmix=5, dist=MultivariateGaussianDistribution):
    '''
    generate random gaussian mixture model of multivariate distribution
    '''
    dists = [randMVG(n=n) for _ in range(nmix)]
    return GeneralMixtureModel(dists)
time_list = [100, 500, 900, 1500]
for time in time_list:
    samples = hiker_paths.get_all_at_time(time)

    weights = p_filter.weighting_func(log_weights)
    print(weights)
    print(samples)  # NormalDistribution

    samples = [[float(item[0]), float(item[1])] for item in samples]

    test = np.random.multivariate_normal([50, 50], [[1, 0], [0, 1]], 10)

    print(test)

    gmm = GeneralMixtureModel.from_samples(MultivariateGaussianDistribution,
                                           n_components=4,
                                           X=samples,
                                           weights=weights)

    clf = pomegranate_to_scikitlearn(gmm)

    graph_shape = depth_dict["data"].shape
    print(graph_shape)

    # display predicted scores by the model as a contour plot

    ax = plt.subplot(111)

    x = np.linspace(0.0, graph_shape[0])
    y = np.linspace(0.0, graph_shape[1])
    X, Y = np.meshgrid(x, y)
    XX = np.array([X.ravel(), Y.ravel()]).T
Beispiel #22
0
# print("Naive Bayes - Semisupervised Learning Accuracy: {}".format((model_b.predict(x_val) == y_val).mean()))

model_c = BayesClassifier.from_samples(MultivariateGaussianDistribution,
                                       x_train,
                                       y_train,
                                       inertia=0.0,
                                       pseudocount=0.0,
                                       stop_threshold=0.1,
                                       max_iterations=100,
                                       verbose=True,
                                       n_jobs=1)
print("Bayes Classifier - Semisupervised Learning Accuracy: {}".format(
    (model_c.predict(x_val) == y_val).mean()))

# general mixture model
d0 = GeneralMixtureModel.from_samples(NormalDistribution, 2,
                                      x_train[y_train == 0])
d1 = GeneralMixtureModel.from_samples(NormalDistribution, 2,
                                      x_train[y_train == 1])
d2 = GeneralMixtureModel.from_samples(NormalDistribution, 2,
                                      x_train[y_train == 2])
d3 = GeneralMixtureModel.from_samples(NormalDistribution, 2,
                                      x_train[y_train == 3])
d4 = GeneralMixtureModel.from_samples(NormalDistribution, 2,
                                      x_train[y_train == 4])
d5 = GeneralMixtureModel.from_samples(NormalDistribution, 2,
                                      x_train[y_train == 5])
d6 = GeneralMixtureModel.from_samples(NormalDistribution, 2,
                                      x_train[y_train == 6])
d7 = GeneralMixtureModel.from_samples(NormalDistribution, 2,
                                      x_train[y_train == 7])
d8 = GeneralMixtureModel.from_samples(NormalDistribution, 2,
Beispiel #23
0
np.random.seed(0)

X = np.c_[data_thr.orbit, data_thr.rate, data_thr.rateA, data_thr.rateB,
          data_thr.rateC, data_thr.rateCA]
Html_file = open("gmm_pomegranate_files/gmm3w_pomegranate.html",
                 "w")

scaler = StandardScaler()
X = scaler.fit_transform(X)
# 1 corresponds to data_thr.rate and 4=5-1 to data_thr.rateC
w = w / np.sqrt(scaler.var_[1:])

# w = np.exp(-np.exp(3 * w.mean(axis=1)))
w = 1. / w.mean(axis=1) ** 2

gmm = GeneralMixtureModel(MultivariateGaussianDistribution, n_components=3)
gmm.fit(X, weights=w)
preds = gmm.predict(X)
probs = gmm.predict_proba(X)

data_thr['preds'] = pd.Series(preds).astype("category")

color_key = ["red", "blue", "yellow", "grey", "black", "purple", "pink",
             "brown", "green", "orange"]  # Spectral9
color_key = color_key[:len(set(preds))+1]

covs = np.array([np.array(gmm.distributions[m].parameters[1])
                 for m in range(len(gmm.distributions))])
means = np.array([np.array(gmm.distributions[m].parameters[0])
                  for m in range(len(gmm.distributions))])
Beispiel #24
0
def generate_guide_rna_prediction(
        loom,
        guide_rnas,
        nguide_ca='nGuide',
        nguide_reads_ca='nGuideReads',
        cell_prediction_summary_ca='CellGuidePrediction',
        overwrite=False,
        only_generate_log2=False,
        ncell_threshold_for_guide=10,
        nguide_threshold_for_cell=10):
    """
    This approach is inspired by Replogle et a. 2018 (https://doi.org/10.1038/s41587-020-0470-y). However, instead of a Gaussian/Poisson mixture, this routine uses a Poisson/Poisson mixture. 
    This routine uses the pomegranate package (https://github.com/jmschrei/pomegranate). 

    Parameters
    ----------
    loom : LoomConnection
        A LoomConnection object upon which guide rna predictions will be made
        
    guide_rnas : iterable of strings
        a list or other iterable of the strings, each corresponding to a column attribute of `loom` indicate the raw counts of a given guide RNA over cells 
    nguide_ca : str
         QC metric, indicating the name of the column attribute to use to indicate the number of predicted guide RNAs for a cell (Default value = 'nGuide')
    nguide_reads_ca :
         QC metric, indicating the name of the column attribute to use to indicate the total number of guide RNA reads for a cell(Default value = 'nGuideReads')
    cell_prediction_summary_ca : str
         Indicates the name of the column attribute to use to indicate a summary of positively-predicted guide RNAs for a cell(Default value = 'CellGuidePrediction')
    overwrite : bool
         If False, will raise exception if requested column attributes have already been written.  If True, will overwrite existing column attributes. (Default value = False)
    only_generate_log2 : bool
         If true, will generate log2 guide RNA counts, but will not apply any mixture model prediction. (Default value = False)
    ncell_threshold_for_guide : int
         Threshold for the number of cells wherein guide should have nonzero counts for mixture model to attempt prediction. (Default value = 10)
    nguide_threshold_for_cell : int
         Threshold for the number of guides to be detected in a given cell to attempt to make a prediction for that particular cell. (Default value = 10)

    Returns
    -------

    """
    from panopticon.utilities import import_check
    exit_code = import_check("pomegranate",
                             'conda install -c anaconda pomegranate')
    if exit_code != 0:
        return
    import pandas as pd

    if nguide_reads_ca in loom.ca.keys() and overwrite == False:
        raise Exception(
            "{} already in loom.ca.keys(); if intended, set overwrite argument to True"
            .format(nguide_reads_ca))

    guide_rna_dfs = []
    for guide_rna in guide_rnas:
        guide_rna_dfs.append(
            pd.DataFrame(loom.ca[guide_rna], columns=[guide_rna], copy=True))
    guide_rna_dfs = pd.concat(guide_rna_dfs, axis=1)
    loom.ca[nguide_reads_ca] = guide_rna_dfs.sum(axis=1).values
    threshold_for_cell_mask = loom.ca[
        nguide_reads_ca] >= nguide_threshold_for_cell
    prediction_ca_names = []
    for guide_rna in guide_rnas:
        if guide_rna not in loom.ca.keys():
            raise Exception(
                "raw_antibody_count_df must be prepared such that columns match column attributes in loom corresponding to raw antibody conjugate counts"
            )

        new_ca_name = guide_rna + '_log2'
        if new_ca_name in loom.ca.keys() and overwrite == False:
            raise Exception(
                "{} already in loom.ca.keys(); rename guide column attribute and re-run, or set overwrite argument to True"
                .format(new_ca_name))

        loom.ca[new_ca_name] = np.log2(loom.ca[guide_rna])
        if not only_generate_log2:
            from pomegranate import GeneralMixtureModel, PoissonDistribution

            prediction_ca_name = guide_rna + '_prediction'
            prediction_ca_names.append(prediction_ca_name)
            if prediction_ca_name in loom.ca.keys() and overwrite == False:
                raise Exception(
                    "{} already in loom.ca.keys(); rename guide rna column attribute and re-run, or set overwrite argument to True"
                    .format(prediction_ca_name))
            if (~np.isfinite(loom.ca[new_ca_name])).sum() > 0:
                cellmask = np.isfinite(loom.ca[new_ca_name])
                if cellmask.sum(
                ) >= ncell_threshold_for_guide:  # have minimum cells for guide
                    model = GeneralMixtureModel.from_samples(
                        [PoissonDistribution, PoissonDistribution],
                        n_components=2,
                        X=loom.ca[new_ca_name][cellmask.nonzero()[0]].reshape(
                            -1, 1))
                    predictions = []
                    for val in loom.ca[new_ca_name]:
                        if not np.isfinite(val):
                            predictions.append(np.nan)
                        else:
                            predictions.append(
                                model.predict(np.array(val).reshape(-1, 1))[0])
                else:
                    predictions = [0] * loom.shape[1]
                predictions = np.array(predictions)

            else:
                #print(guide_rna, loom.ca[guide_rna].sum())
                # print('Warning:  pomegrante Poisson/Normal mixture model has predicted a Poisson component with greater log(UMI+1) counts than normal component.  This is unusual behavior!')
                model = GeneralMixtureModel.from_samples(
                    [PoissonDistribution, PoissonDistribution],
                    n_components=2,
                    X=loom.ca[new_ca_name].reshape(-1, 1))
                #model.fit(loom.ca[new_ca_name].reshape(-1, 1))
                predictions = model.predict(loom.ca[new_ca_name].reshape(
                    -1, 1))

            if loom.ca[new_ca_name][np.array(predictions) == 0].mean(
            ) > loom.ca[new_ca_name][np.array(predictions) == 1].mean():
                predictions = 1 - predictions
            predictions = np.array(predictions)
            predictions = np.nan_to_num(predictions, nan=0.0)
            predictions *= threshold_for_cell_mask
            loom.ca[prediction_ca_name] = predictions

    guide_prediction_dfs = []
    for prediction_ca_name in prediction_ca_names:
        guide_prediction_dfs.append(
            pd.DataFrame(loom.ca[prediction_ca_name],
                         columns=[prediction_ca_name],
                         copy=True))
    guide_prediction_dfs = pd.concat(guide_prediction_dfs, axis=1)
    loom.ca[nguide_ca] = guide_prediction_dfs.sum(axis=1).values

    loom.ca[cell_prediction_summary_ca] = guide_prediction_dfs.apply(
        lambda x: '+'.join(guide_prediction_dfs.columns[np.where(x == 1)[0]]),
        axis=1).values