Example #1
0
def main(cor_file, perm_template, nperm, test_type='two_sided', outfile=None):
    '''
    Compute pseudo p-vals from a set correlations obtained from permuted data' 
    Pseudo p-vals are the percentage of times a correlation at least 
    as extreme as the "real" one was observed in simulated datasets.
    
    Files containing the permuted correlations should be named with a 
    consistent template, and these file names cannot contain any "#" characters.
    '''
    cor = read_txt(cor_file)
    p_vals = get_pvalues(cor, perm_template, nperm, test_type)
    if outfile is None:
        outfile = cor_file + '.nperm_%d.pvals' % nperm
    write_txt(p_vals, outfile)
Example #2
0
def main(counts_file, nperm, perm_template, outpath='./'):
    '''
    Make n simulated datasets used to get pseudo p-values. 
    Simulated datasets are generated by assigning each OTU in each sample 
    an abundance that is randomly drawn (w. replacement) from the 
    abundances of the OTU in all samples. 
    Simulated datasets are either written out as txt files.
    '''
    if perm_template is None:
        perm_template = counts_file + '.permuted_#.txt'
    ## read counts data
    counts = read_txt(counts_file)
    ## make permutated data
    make_bootstraps(counts, nperm, perm_template, outpath=outpath)
Example #3
0
def get_pvalues(cor, perm_template, nperm, test_type='two_sided', iprint=0):
    '''
    Compute pseudo p-vals from a set correlations obtained from permuted data' 
    Pseudo p-vals are the percentage of times a correlation at least 
    as extreme as the "real" one was observed in simulated datasets.
    
    Files containing the permuted correlations should be named with a 
    consistent template, and these file names cannot contain any "#" characters.

    Parameters
    ----------
    cor : DataFrame
        Inferred correlations whose p-values are to be computed.
    perm_template : str
        The template used for naming the correlation files of the 
        permuted data. The iteration number is indicated with a "#".
        For example: 'permuted/cor.sparcc.permuted_#.txt'
    nperm : int
        Number of permutations available.
    test_type : 'two_sided' (default) | 'one_sided'
        two-sided  = considering only the correlation magnitude. 
        one-sided  = accounting for the sign of correlations.
    iprint : int (default = 0)
        The interval at which iteration number is printed out.
        If iprint<=0 no printouts are made.
    
    Returns
    -------
    p_vals: frame
        Computed pseudo p-values.
    '''
    if test_type == 'two_sided':
        cmpfun = compare2sided
    elif test_type == 'one_sided':
        cmpfun = compare1sided
    else:
        raise ValueError, 'unsupported test type "%s"' % test_type
    n_sig = DF(np.zeros(cor.shape), index=cor.index, columns=cor.columns)
    for i in xrange(nperm):
        if iprint > 0:
            if not i % iprint: print i
        permfile = perm_template.replace('#', '%d' % i)
        cor_perm = read_txt(permfile).values
        n_sig[cmpfun(cor_perm, cor)] += 1
    p_vals = 1. * n_sig / nperm
    p_vals.values[np.diag_indices_from(p_vals.values)] = 1
    return p_vals
        help="Correlation strength exclusion threshold (0.1 default).")
    (options, args) = parser.parse_args()
    counts_file = args[0]

    from analysis_methods import basis_corr
    from io_methods import read_txt, write_txt

    kwargs = options.__dict__
    algo = kwargs.pop('algo')
    cor_file = kwargs.pop('cor_file')
    cov_file = kwargs.pop('cov_file')
    if cor_file is None: cor_file = 'cor_mat_' + algo + '.out'
    if cov_file is None: cov_file = 'cov_mat_' + algo + '.out'

    print 'reading data'
    counts = read_txt(counts_file)

    ## Calculate correlations between components using SparCC
    print 'computing correlations'
    cor, cov = basis_corr(counts, method=algo, **kwargs)

    ## write out results
    print 'writing results'
    write_txt(cor, cor_file)
    print 'wrote ' + cor_file
    if cov is not None:
        write_txt(cov, cov_file)
        print 'wrote ' + cov_file

    print 'Done!'
Example #5
0
def driver():
    #if __name__ == '__main__':
    #print "RUNNING CODE MAIN"
    ## parse input arguments
    from optparse import OptionParser
    kwargs = {}
    usage = (
        'Compute the correlation between components (e.g. OTUs).\n'
        'By default uses the SparCC algorithm to account for compositional effects.\n'
        'Correlation and covariance (when applies) matrices are written out as txt files. \n'
        'Counts file needs to be a tab delimited text file where columns are samples and rows are components (e.g. OTUS).\n'
        ' See example/fake_data.txt for an example file.\n'
        '\n'
        'Usage:   python SparCC.py counts_file [options]\n'
        'Example: python SparCC.py example/fake_data.txt -i 20 --cor_file=example/basis_corr/cor_mat_sparcc.out'
    )
    parser = OptionParser(usage)
    parser.add_option("-c",
                      "--cor_file",
                      dest="cor_file",
                      type='str',
                      help="File to which correlation matrix will be written.")
    parser.add_option("-v",
                      "--cov_file",
                      dest="cov_file",
                      type='str',
                      help="File to which covariance matrix will be written.")
    parser.add_option(
        "-a",
        "--algo",
        dest="algo",
        default='SparCC',
        help=
        "Name of algorithm used to compute correlations (SparCC (default) | pearson | spearman | kendall)"
    )
    parser.add_option(
        "-i",
        "--iter",
        dest='iter',
        type='int',
        default=20,
        help="Number of inference iterations to average over (20 default).")
    parser.add_option(
        "-x",
        "--xiter",
        dest='xiter',
        type='int',
        default=10,
        help=
        "Number of exclusion iterations to remove strongly correlated pairs (10 default)."
    )
    parser.add_option(
        "-t",
        "--thershold",
        dest='th',
        type='float',
        default=0.1,
        help="Correlation strength exclusion threshold (0.1 default).")
    parser.add_option("-p",
                      "--pval_file",
                      dest="pval_file",
                      type='str',
                      help="File to which pvalues will be written.")
    (options, args) = parser.parse_args()
    #print "OPTIONS: ", options
    #print "ARGS: ", args
    counts_file = args[0]

    from analysis_methods import basis_corr
    from io_methods import read_txt, write_txt

    kwargs = options.__dict__
    algo = kwargs.pop('algo')
    cor_file = kwargs.pop('cor_file')
    cov_file = kwargs.pop('cov_file')
    pval_file = kwargs.pop('pval_file')
    if cor_file is None: cor_file = 'cor_mat_' + algo + '.out'
    if cov_file is None: cov_file = 'cov_mat_' + algo + '.out'
    if pval_file is None: pval_file = 'pval_mat_' + algo + '.out'

    print('reading data')
    counts = read_txt(counts_file)

    ## Calculate correlations between components using SparCC
    print('computing correlations')
    cor, cov, pval = basis_corr(counts, method=algo, **kwargs)
    print(counts)
    ## write out results
    print('writing results')
    write_txt(cor, cor_file)
    print('wrote ' + cor_file)
    if cov is not None:
        write_txt(cov, cov_file)
        #print 'wrote ' + cov_file

    if pval is not None:
        write_txt(pval, pval_file)