Ejemplo n.º 1
0
    def make_network(self, algo = 'crude', **kwargs):
        '''
        '''
        import SurveyStructures.OTUnetwork as ONet
        n, k       = self.shape
        row_labels = kwargs.get('row_labels', range(k))
        col_labels = kwargs.get('col_labels', range(n))
        
        md = self.to_Surveymat(**kwargs)
        if   algo == 'crude':   mat,pval = (md.log10()).correlation(type = 'pearsonr')
        elif algo in set(['sparse', 'csi', 'clr']):   
            v_sparse, c_sparse, Cov_sparse = self.basis_corr(method = algo,**kwargs)
            mat   = MatrixDictionary()
            mat.from_matrix(c_sparse, row_labels, row_labels)
            pval = mat
        elif algo == 'basis':
            md     = Survey_matrix()
            n, k   = self.shape
            mat    = np.log(self.basis)
            md.from_matrix(mat.transpose(), row_labels, col_labels)
            mat,pval = md.correlation(type = 'pearsonr')
        elif algo in set(['pearsonr','kendalltau','spearmanr']):
            abunds = self.to_Surveymat()
            mat,pval = abunds.correlation(type = algo)

                
        ## make network
        if algo == 'MI': net = ONet.OTUnetwork(  abunds = abunds, lineages = lineages, matrix = mat.to_PairMatrix(), p_vals = {'direct':None}, algo = 'MI', lag = 0)
        else:  net = ONet.OTUnetwork(node_ids = mat.row_labels(), matrix = mat, p_vals = {'direct':pval}, algo = algo, lag = 0)
        return net
Ejemplo n.º 2
0
def dirichlet_multi(a , n, N, **kwargs):
    '''
    Each sample is a sample from a multinomial with probabilities drawn from a dirichlet.
    Inputs:
        a = [array] alpha parameters of dirichlet. length determins the number of otus.
        n = [int] number of samples.
        N = [int/array] number of reads to take for each sample.
            If int take same number of reads from each sample.
            If list, length must be = n.
    Optional inputs:
        otu_labels    = [list] row labels for Survey_matrix. Default to ['otu_1','otu_2',...,'otu_k'].
        sample_labels = [list] col labels for Survey_matrix. Default to ['sample_1','sample_2',...,'sample_n'].
    Return Survey_matrix of simulated data.
    '''    
    k     = len(a)              # number of otus
    if 'otu_labels' not in kwargs: 
        otu_labels   = map(lambda i: 'otu_' + str(i) ,range(1,1+k))
    else: otu_labels = kwargs['otu_labels']
    if 'sample_labels' not in kwargs: 
        sample_labels = map(lambda i: 'sample_' + str(i) ,range(1,1+n))
    else: sample_labels = kwargs['sample_labels']
    if type(N) == int: N = [N]*n
    
    probs = dirichlet(a,int(n)) # each row gives the probabilities of different otus in 1 sample.
    mat   = np.zeros((k,n))
    for i in range(n):
        mat[:,i] =  multinomial(N[i],probs[i,:])
    sim_data   = Survey_matrix()
    sim_data.from_matrix(mat ,row_labels = otu_labels, col_labels = sample_labels )
    return sim_data
Ejemplo n.º 3
0
def dirichlet_multi(a, n, N, **kwargs):
    '''
    Each sample is a sample from a multinomial with probabilities drawn from a dirichlet.
    Inputs:
        a = [array] alpha parameters of dirichlet. length determins the number of otus.
        n = [int] number of samples.
        N = [int/array] number of reads to take for each sample.
            If int take same number of reads from each sample.
            If list, length must be = n.
    Optional inputs:
        otu_labels    = [list] row labels for Survey_matrix. Default to ['otu_1','otu_2',...,'otu_k'].
        sample_labels = [list] col labels for Survey_matrix. Default to ['sample_1','sample_2',...,'sample_n'].
    Return Survey_matrix of simulated data.
    '''
    k = len(a)  # number of otus
    if 'otu_labels' not in kwargs:
        otu_labels = map(lambda i: 'otu_' + str(i), range(1, 1 + k))
    else:
        otu_labels = kwargs['otu_labels']
    if 'sample_labels' not in kwargs:
        sample_labels = map(lambda i: 'sample_' + str(i), range(1, 1 + n))
    else:
        sample_labels = kwargs['sample_labels']
    if type(N) == int: N = [N] * n

    probs = dirichlet(a, int(
        n))  # each row gives the probabilities of different otus in 1 sample.
    mat = np.zeros((k, n))
    for i in range(n):
        mat[:, i] = multinomial(N[i], probs[i, :])
    sim_data = Survey_matrix()
    sim_data.from_matrix(mat, row_labels=otu_labels, col_labels=sample_labels)
    return sim_data
Ejemplo n.º 4
0
def independent_kde(site, original):
    from numpy.random import rand
    counts, otus, samples = original.to_matrix()
    k = len(otus)
    n = len(samples)
    #    N = original.total_counts(rows = False).values()
    #    N_avg = np.mean(N)
    N = [1000] * n
    ## load kde of each OTU
    path = '../data/otu_dist/'
    file = site + '_filtered_kde.pick'
    f = open(path + file, 'r')
    kdes, cdfs = pickle.load(f)
    f.close()
    ## simulate each OTU
    x = np.linspace(0, 1, 100)
    simulated = np.zeros((k, n))
    for i, otu in enumerate(otus):
        cdf = cdfs[otu]
        for j in range(n):
            p = x[cdf > rand()][0]
            simulated[i, j] = binomial(N[j], p)
    ## convert to Survey_matrix
    sim = Survey_matrix()
    sim.from_matrix(simulated, otus, samples)
    return sim
Ejemplo n.º 5
0
 def to_SurveyMat(self, **kwargs):
     temp = Survey_matrix()
     n, k = self.shape
     row_labels = kwargs.get('row_labels', range(k))
     col_labels = kwargs.get('col_labels', range(n))
     temp.from_matrix(self.transpose(), row_labels, col_labels)
     return temp
Ejemplo n.º 6
0
def independent_kde(site, original):
    from numpy.random import rand        
    counts , otus, samples = original.to_matrix()
    k = len(otus)
    n = len(samples)
#    N = original.total_counts(rows = False).values()
#    N_avg = np.mean(N)
    N = [1000] * n
    ## load kde of each OTU
    path  = '../data/otu_dist/'
    file  = site + '_filtered_kde.pick'
    f     = open(path + file,'r')
    kdes, cdfs  = pickle.load(f)
    f.close()
    ## simulate each OTU
    x = np.linspace(0,1,100)
    simulated = np.zeros((k,n))
    for i,otu in enumerate(otus):
        cdf = cdfs[otu]
        for j in range(n):
            p = x[cdf>rand()][0]  
            simulated[i,j] = binomial(N[j],p)
    ## convert to Survey_matrix
    sim = Survey_matrix()
    sim.from_matrix(simulated, otus, samples)
    return sim
Ejemplo n.º 7
0
 def to_SurveyMat(self, **kwargs):
     temp       = Survey_matrix()
     n, k       = self.shape
     row_labels = kwargs.get('row_labels', range(k))
     col_labels = kwargs.get('col_labels', range(n))
     temp.from_matrix(self.transpose(), row_labels, col_labels)
     return temp
Ejemplo n.º 8
0
def test_simulate():
    mat = Survey_matrix()
    x = np.array([[20,20],[20,40]])
    r_lab = ['r0','r1']
    c_lab = ['c0','c1']
    mat.from_matrix(x,r_lab,c_lab)
    sims = simulate(mat, dirichlet_multi, num_sim = 10)
    print sims
Ejemplo n.º 9
0
def test_simulate():
    mat = Survey_matrix()
    x = np.array([[20, 20], [20, 40]])
    r_lab = ['r0', 'r1']
    c_lab = ['c0', 'c1']
    mat.from_matrix(x, r_lab, c_lab)
    sims = simulate(mat, dirichlet_multi, num_sim=10)
    print sims
Ejemplo n.º 10
0
def test_permute():
    mat = Survey_matrix()
    x = np.array([[1,2,3,4,5],[6,7,8,9,10]])
    r_lab = ['r0','r1']
    c_lab = ['c0','c1','c2','c3','c4']
    mat.from_matrix(x,r_lab,c_lab)
    sims = permute_w_replacement(mat)
    print x
    print sims.to_matrix()
Ejemplo n.º 11
0
def test_permute():
    mat = Survey_matrix()
    x = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
    r_lab = ['r0', 'r1']
    c_lab = ['c0', 'c1', 'c2', 'c3', 'c4']
    mat.from_matrix(x, r_lab, c_lab)
    sims = permute_w_replacement(mat)
    print x
    print sims.to_matrix()
Ejemplo n.º 12
0
def permute_w_replacement(original):
    '''
    Create simulated dataset were the count of each otu in each sample are randomly sampled from the all the counts of that otu in all samples.
    '''
    from numpy.random import randint 
    counts , otus, samples = original.to_matrix()
    n,k = counts.shape
    new = np.zeros( (n,k) )
    for row in range(n):
        new[row] = counts[row, randint(0,k,(1,k)) ]
    sim = Survey_matrix()
    sim.from_matrix(new, otus, samples)
    return sim
Ejemplo n.º 13
0
def permute_w_replacement(original):
    '''
    Create simulated dataset were the count of each otu in each sample are randomly sampled from the all the counts of that otu in all samples.
    '''
    from numpy.random import randint
    counts, otus, samples = original.to_matrix()
    n, k = counts.shape
    new = np.zeros((n, k))
    for row in range(n):
        new[row] = counts[row, randint(0, k, (1, k))]
    sim = Survey_matrix()
    sim.from_matrix(new, otus, samples)
    return sim
Ejemplo n.º 14
0
    def make_network(self, algo='crude', **kwargs):
        '''
        '''
        import SurveyStructures.OTUnetwork as ONet
        n, k = self.shape
        row_labels = kwargs.get('row_labels', range(k))
        col_labels = kwargs.get('col_labels', range(n))

        md = self.to_Surveymat(**kwargs)
        if algo == 'crude':
            mat, pval = (md.log10()).correlation(type='pearsonr')
        elif algo in set(['sparse', 'csi', 'clr']):
            v_sparse, c_sparse, Cov_sparse = self.basis_corr(method=algo,
                                                             **kwargs)
            mat = MatrixDictionary()
            mat.from_matrix(c_sparse, row_labels, row_labels)
            pval = mat
        elif algo == 'basis':
            md = Survey_matrix()
            n, k = self.shape
            mat = np.log(self.basis)
            md.from_matrix(mat.transpose(), row_labels, col_labels)
            mat, pval = md.correlation(type='pearsonr')
        elif algo in set(['pearsonr', 'kendalltau', 'spearmanr']):
            abunds = self.to_Surveymat()
            mat, pval = abunds.correlation(type=algo)

        ## make network
        if algo == 'MI':
            net = ONet.OTUnetwork(abunds=abunds,
                                  lineages=lineages,
                                  matrix=mat.to_PairMatrix(),
                                  p_vals={'direct': None},
                                  algo='MI',
                                  lag=0)
        else:
            net = ONet.OTUnetwork(node_ids=mat.row_labels(),
                                  matrix=mat,
                                  p_vals={'direct': pval},
                                  algo=algo,
                                  lag=0)
        return net
Ejemplo n.º 15
0
def test_independent_betabinom():
    n = 5
    m = 50
    mat = np.floor(100 * np.random.rand(n, m))
    mat[1, :] = 100 - mat[0, :]
    row_labels = range(n)
    col_labels = range(m)
    data = Survey_matrix()
    data.from_matrix(mat, row_labels, col_labels)
    c, p = data.correlation(type='pearsonr')
    print c.to_matrix()
    print p.to_matrix()
    sim = independent_betabinom(data)
    c_sim, p = sim.correlation(type='pearsonr')
    print c_sim.to_matrix()
    print p.to_matrix()
    sim_norm = sim.normalize(rows=False)
    c_sim_norm, p = sim_norm.correlation(type='pearsonr')
    print c_sim_norm.to_matrix()
    print p.to_matrix()
Ejemplo n.º 16
0
def test_independent_betabinom(): 
    n = 5
    m = 50
    mat = np.floor(100*np.random.rand(n,m))
    mat[1,:] = 100-mat[0,:]
    row_labels = range(n)
    col_labels = range(m)
    data = Survey_matrix()
    data.from_matrix(mat, row_labels, col_labels)
    c,p = data.correlation(type = 'pearsonr')
    print c.to_matrix()
    print p.to_matrix()
    sim = independent_betabinom(data)
    c_sim, p = sim.correlation(type = 'pearsonr')
    print c_sim.to_matrix()
    print p.to_matrix()
    sim_norm = sim.normalize(rows = False)
    c_sim_norm, p = sim_norm.correlation(type = 'pearsonr')
    print c_sim_norm.to_matrix()
    print p.to_matrix()
Ejemplo n.º 17
0
def independent_betabinom(original):
    counts , otus, samples = original.to_matrix()
    k = len(otus)
    n = len(samples)
    N = original.total_counts(rows = False).values()
    N_avg = np.mean(N)
    ## estimate beta-dist params for each OTU
    a = []
    tot = counts.sum(axis = 0)
    for i in range(k):
        c = np.c_[counts[i,:], tot- counts[i,:]]
        a_opt, ll = estimate.dirmulti(c, symmetric = False)
        a.append(a_opt)
    ## simulate each OTU
    simulated = np.zeros((k,n))
    for i in range(k):
        probs = dirichlet(a[i],n)
        for j,p in enumerate(probs):
            simulated[i,j] = multinomial(N_avg,p)[0]
    ## convert to Survey_matrix
    sim = Survey_matrix()
    sim.from_matrix(simulated, otus, samples)
    return sim
Ejemplo n.º 18
0
def independent_betabinom(original):
    counts, otus, samples = original.to_matrix()
    k = len(otus)
    n = len(samples)
    N = original.total_counts(rows=False).values()
    N_avg = np.mean(N)
    ## estimate beta-dist params for each OTU
    a = []
    tot = counts.sum(axis=0)
    for i in range(k):
        c = np.c_[counts[i, :], tot - counts[i, :]]
        a_opt, ll = estimate.dirmulti(c, symmetric=False)
        a.append(a_opt)
    ## simulate each OTU
    simulated = np.zeros((k, n))
    for i in range(k):
        probs = dirichlet(a[i], n)
        for j, p in enumerate(probs):
            simulated[i, j] = multinomial(N_avg, p)[0]
    ## convert to Survey_matrix
    sim = Survey_matrix()
    sim.from_matrix(simulated, otus, samples)
    return sim