Ejemplo n.º 1
0
def make_bootstraps(counts, nperm, perm_template, outpath='./', iprint=0):
    '''
    Make n simulated datasets used to get pseudo p-values. 
    Simulated datasets are generated by assigning each OTU in each sample 
    an abundance that is randomly drawn (w. replacement) from the 
    abundances of the OTU in all samples. 
    Simulated datasets are either written out as txt files.

    Parameters
    ----------
    counts : DataFrame
        Inferred correlations whose p-values are to be computed.
    nperm : int
        Number of permutations to produce.
    perm_template : str
        Template for the permuted data file names.
        Should not include the path, which is specified using the 
        outpath parameter.
        The iteration number is indicated with a "#".
        For example: 'permuted/counts.permuted_#.txt'
    outpath : str (default './')
        The path to which permuted data will be written.
        If not provided files will be written to the cwd.
    iprint : int (default = 0)
        The interval at which iteration number is printed out.
        If iprint<=0 no printouts are made.
    '''
    if not os.path.exists(outpath): os.makedirs(outpath)
    for i in range(nperm):
        if iprint > 0:
            if not i % iprint: print(i)
        counts_perm = permute_w_replacement(counts)
        ## write out cors
        outfile = outpath + perm_template.replace('#', '%d' % i)
        write_txt(counts_perm, outfile)
Ejemplo n.º 2
0
def main(cor_file, perm_template, nperm, test_type='two_sided', outfile=None):
    '''
    Compute pseudo p-vals from a set correlations obtained from permuted data' 
    Pseudo p-vals are the percentage of times a correlation at least 
    as extreme as the "real" one was observed in simulated datasets.
    
    Files containing the permuted correlations should be named with a 
    consistent template, and these file names cannot contain any "#" characters.
    '''
    cor = read_txt(cor_file)
    p_vals = get_pvalues(cor, perm_template, nperm, test_type)
    if outfile is None:
        outfile = cor_file + '.nperm_%d.pvals' % nperm
    write_txt(p_vals, outfile)
        help="Correlation strength exclusion threshold (0.1 default).")
    (options, args) = parser.parse_args()
    counts_file = args[0]

    from analysis_methods import basis_corr
    from io_methods import read_txt, write_txt

    kwargs = options.__dict__
    algo = kwargs.pop('algo')
    cor_file = kwargs.pop('cor_file')
    cov_file = kwargs.pop('cov_file')
    if cor_file is None: cor_file = 'cor_mat_' + algo + '.out'
    if cov_file is None: cov_file = 'cov_mat_' + algo + '.out'

    print 'reading data'
    counts = read_txt(counts_file)

    ## Calculate correlations between components using SparCC
    print 'computing correlations'
    cor, cov = basis_corr(counts, method=algo, **kwargs)

    ## write out results
    print 'writing results'
    write_txt(cor, cor_file)
    print 'wrote ' + cor_file
    if cov is not None:
        write_txt(cov, cov_file)
        print 'wrote ' + cov_file

    print 'Done!'
Ejemplo n.º 4
0
def driver():
    #if __name__ == '__main__':
    #print "RUNNING CODE MAIN"
    ## parse input arguments
    from optparse import OptionParser
    kwargs = {}
    usage = (
        'Compute the correlation between components (e.g. OTUs).\n'
        'By default uses the SparCC algorithm to account for compositional effects.\n'
        'Correlation and covariance (when applies) matrices are written out as txt files. \n'
        'Counts file needs to be a tab delimited text file where columns are samples and rows are components (e.g. OTUS).\n'
        ' See example/fake_data.txt for an example file.\n'
        '\n'
        'Usage:   python SparCC.py counts_file [options]\n'
        'Example: python SparCC.py example/fake_data.txt -i 20 --cor_file=example/basis_corr/cor_mat_sparcc.out'
    )
    parser = OptionParser(usage)
    parser.add_option("-c",
                      "--cor_file",
                      dest="cor_file",
                      type='str',
                      help="File to which correlation matrix will be written.")
    parser.add_option("-v",
                      "--cov_file",
                      dest="cov_file",
                      type='str',
                      help="File to which covariance matrix will be written.")
    parser.add_option(
        "-a",
        "--algo",
        dest="algo",
        default='SparCC',
        help=
        "Name of algorithm used to compute correlations (SparCC (default) | pearson | spearman | kendall)"
    )
    parser.add_option(
        "-i",
        "--iter",
        dest='iter',
        type='int',
        default=20,
        help="Number of inference iterations to average over (20 default).")
    parser.add_option(
        "-x",
        "--xiter",
        dest='xiter',
        type='int',
        default=10,
        help=
        "Number of exclusion iterations to remove strongly correlated pairs (10 default)."
    )
    parser.add_option(
        "-t",
        "--thershold",
        dest='th',
        type='float',
        default=0.1,
        help="Correlation strength exclusion threshold (0.1 default).")
    parser.add_option("-p",
                      "--pval_file",
                      dest="pval_file",
                      type='str',
                      help="File to which pvalues will be written.")
    (options, args) = parser.parse_args()
    #print "OPTIONS: ", options
    #print "ARGS: ", args
    counts_file = args[0]

    from analysis_methods import basis_corr
    from io_methods import read_txt, write_txt

    kwargs = options.__dict__
    algo = kwargs.pop('algo')
    cor_file = kwargs.pop('cor_file')
    cov_file = kwargs.pop('cov_file')
    pval_file = kwargs.pop('pval_file')
    if cor_file is None: cor_file = 'cor_mat_' + algo + '.out'
    if cov_file is None: cov_file = 'cov_mat_' + algo + '.out'
    if pval_file is None: pval_file = 'pval_mat_' + algo + '.out'

    print('reading data')
    counts = read_txt(counts_file)

    ## Calculate correlations between components using SparCC
    print('computing correlations')
    cor, cov, pval = basis_corr(counts, method=algo, **kwargs)
    print(counts)
    ## write out results
    print('writing results')
    write_txt(cor, cor_file)
    print('wrote ' + cor_file)
    if cov is not None:
        write_txt(cov, cov_file)
        #print 'wrote ' + cov_file

    if pval is not None:
        write_txt(pval, pval_file)