def make_bootstraps(counts, nperm, perm_template, outpath='./', iprint=0): ''' Make n simulated datasets used to get pseudo p-values. Simulated datasets are generated by assigning each OTU in each sample an abundance that is randomly drawn (w. replacement) from the abundances of the OTU in all samples. Simulated datasets are either written out as txt files. Parameters ---------- counts : DataFrame Inferred correlations whose p-values are to be computed. nperm : int Number of permutations to produce. perm_template : str Template for the permuted data file names. Should not include the path, which is specified using the outpath parameter. The iteration number is indicated with a "#". For example: 'permuted/counts.permuted_#.txt' outpath : str (default './') The path to which permuted data will be written. If not provided files will be written to the cwd. iprint : int (default = 0) The interval at which iteration number is printed out. If iprint<=0 no printouts are made. ''' if not os.path.exists(outpath): os.makedirs(outpath) for i in range(nperm): if iprint > 0: if not i % iprint: print(i) counts_perm = permute_w_replacement(counts) ## write out cors outfile = outpath + perm_template.replace('#', '%d' % i) write_txt(counts_perm, outfile)
def main(cor_file, perm_template, nperm, test_type='two_sided', outfile=None): ''' Compute pseudo p-vals from a set correlations obtained from permuted data' Pseudo p-vals are the percentage of times a correlation at least as extreme as the "real" one was observed in simulated datasets. Files containing the permuted correlations should be named with a consistent template, and these file names cannot contain any "#" characters. ''' cor = read_txt(cor_file) p_vals = get_pvalues(cor, perm_template, nperm, test_type) if outfile is None: outfile = cor_file + '.nperm_%d.pvals' % nperm write_txt(p_vals, outfile)
help="Correlation strength exclusion threshold (0.1 default).") (options, args) = parser.parse_args() counts_file = args[0] from analysis_methods import basis_corr from io_methods import read_txt, write_txt kwargs = options.__dict__ algo = kwargs.pop('algo') cor_file = kwargs.pop('cor_file') cov_file = kwargs.pop('cov_file') if cor_file is None: cor_file = 'cor_mat_' + algo + '.out' if cov_file is None: cov_file = 'cov_mat_' + algo + '.out' print 'reading data' counts = read_txt(counts_file) ## Calculate correlations between components using SparCC print 'computing correlations' cor, cov = basis_corr(counts, method=algo, **kwargs) ## write out results print 'writing results' write_txt(cor, cor_file) print 'wrote ' + cor_file if cov is not None: write_txt(cov, cov_file) print 'wrote ' + cov_file print 'Done!'
def driver(): #if __name__ == '__main__': #print "RUNNING CODE MAIN" ## parse input arguments from optparse import OptionParser kwargs = {} usage = ( 'Compute the correlation between components (e.g. OTUs).\n' 'By default uses the SparCC algorithm to account for compositional effects.\n' 'Correlation and covariance (when applies) matrices are written out as txt files. \n' 'Counts file needs to be a tab delimited text file where columns are samples and rows are components (e.g. OTUS).\n' ' See example/fake_data.txt for an example file.\n' '\n' 'Usage: python SparCC.py counts_file [options]\n' 'Example: python SparCC.py example/fake_data.txt -i 20 --cor_file=example/basis_corr/cor_mat_sparcc.out' ) parser = OptionParser(usage) parser.add_option("-c", "--cor_file", dest="cor_file", type='str', help="File to which correlation matrix will be written.") parser.add_option("-v", "--cov_file", dest="cov_file", type='str', help="File to which covariance matrix will be written.") parser.add_option( "-a", "--algo", dest="algo", default='SparCC', help= "Name of algorithm used to compute correlations (SparCC (default) | pearson | spearman | kendall)" ) parser.add_option( "-i", "--iter", dest='iter', type='int', default=20, help="Number of inference iterations to average over (20 default).") parser.add_option( "-x", "--xiter", dest='xiter', type='int', default=10, help= "Number of exclusion iterations to remove strongly correlated pairs (10 default)." ) parser.add_option( "-t", "--thershold", dest='th', type='float', default=0.1, help="Correlation strength exclusion threshold (0.1 default).") parser.add_option("-p", "--pval_file", dest="pval_file", type='str', help="File to which pvalues will be written.") (options, args) = parser.parse_args() #print "OPTIONS: ", options #print "ARGS: ", args counts_file = args[0] from analysis_methods import basis_corr from io_methods import read_txt, write_txt kwargs = options.__dict__ algo = kwargs.pop('algo') cor_file = kwargs.pop('cor_file') cov_file = kwargs.pop('cov_file') pval_file = kwargs.pop('pval_file') if cor_file is None: cor_file = 'cor_mat_' + algo + '.out' if cov_file is None: cov_file = 'cov_mat_' + algo + '.out' if pval_file is None: pval_file = 'pval_mat_' + algo + '.out' print('reading data') counts = read_txt(counts_file) ## Calculate correlations between components using SparCC print('computing correlations') cor, cov, pval = basis_corr(counts, method=algo, **kwargs) print(counts) ## write out results print('writing results') write_txt(cor, cor_file) print('wrote ' + cor_file) if cov is not None: write_txt(cov, cov_file) #print 'wrote ' + cov_file if pval is not None: write_txt(pval, pval_file)