def correlations(outputdir, genos, probesetfreeze): print probesetfreeze probesetfreezeid = probesetfreeze[0] probesetfreezename = probesetfreeze[1] probesetfreezefullname = probesetfreeze[2] # outputfile = open("%s/%d_%s.txt" % (outputdir, probesetfreezeid, probesetfreezename), "w+") outputfile.write("%s\t" % "ProbeSet Id") outputfile.write("%s\t" % "ProbeSet Name") outputfile.write("%s\t" % "Geno Name") outputfile.write("%s\t" % "Overlap Number") outputfile.write("%s\t" % "Pearson r") outputfile.write("%s\t" % "Pearson p") outputfile.write("%s\t" % "Spearman r") outputfile.write("%s\t" % "Spearman p") outputfile.write("\n") outputfile.flush() # probesetxrefs = probesets.get_probesetxref(probesetfreezeid) print "Get %d probesetxrefs" % (len(probesetxrefs)) # for probesetxref in probesetxrefs: # probesetid = probesetxref[0] probesetdataid = probesetxref[1] probeset = probesets.get_probeset(probesetid) probesetname = probeset[1] probesetdata = probesets.get_probesetdata(probesetdataid) probesetdata = zip(*probesetdata) probesetdata = utilities.to_dic([strain.lower() for strain in probesetdata[1]], probesetdata[2]) # for geno in genos: genoname = geno['locus'] outputfile.write("%s\t" % probesetid) outputfile.write("%s\t" % probesetname) outputfile.write("%s\t" % genoname) # dic1 = geno['dicvalues'] dic2 = probesetdata keys, values1, values2 = utilities.overlap(dic1, dic2) rs = calculate.correlation(values1, values2) # outputfile.write("%s\t" % len(keys)) outputfile.write("%s\t" % rs[0][0]) outputfile.write("%s\t" % rs[0][1]) outputfile.write("%s\t" % rs[1][0]) outputfile.write("%s\t" % rs[1][1]) outputfile.write("\n") outputfile.flush() # outputfile.close()
def bxd_geno_pheno_correlations(file): # file = open(file, 'w') inbredsetid = 1 genofile = "/home/leiyan/gn/web/genotypes/BXD.geno" # t = genotypes.load_genos(genofile) genostrains = t[0] genos = t[1] print("From geno file, get %d strains" % (len(genostrains))) print("From geno file, get %d genos" % (len(genos))) # publishxrefs = phenotypes.get_publishxrefs(inbredsetid) print("get %d publishxrefs" % (len(publishxrefs))) # file.write("%s\t" % "PhenotypeID") file.write("%s\t" % "PhenotypeName") file.write("%s\t" % "MarkerName") file.write("%s\t" % "MarkerChromosome") file.write("%s\t" % "MarkerCentimorgan") file.write("%s\t" % "MarkerMb") file.write("%s\t" % "PearsonCorrelation") file.write("%s\t" % "PearsonPvalue") file.write("%s\t" % "SpearmanCorrelation") file.write("%s\t" % "SpearmanPvalue") file.write("%s\t" % "Number_of_BXDs_used") file.write("\n") file.flush() # for publishxref in publishxrefs: # publishxrefid = publishxref[0] phenotypeid = publishxref[1] phenotype = phenotypes.get_phenotype(phenotypeid) publicationid = publishxref[2] publication = phenotypes.get_publication(publicationid) publishdataid = publishxref[3] publishdata = phenotypes.get_publishdata(publishdataid) publishdata = zip(*publishdata) if len(publishdata) != 3: print("publishdata - %s: %d" % (publishxrefid, len(publishdata))) continue publishdata = utilities.to_dic([strain.lower() for strain in publishdata[1]], publishdata[2]) # for geno in genos: # dic1 = geno['dicvalues'] dic2 = publishdata keys, values1, values2 = utilities.overlap(dic1, dic2) rs = calculate.correlation(values1, values2) # file.write("%s\t" % publishxrefid) file.write("%s;%s;%s\t" % (phenotype[0], phenotype[1], phenotype[2])) file.write("%s\t" % geno['locus']) file.write("%s\t" % geno['chr']) file.write("%s\t" % geno['cm']) file.write("%s\t" % geno['mb']) file.write("%s\t" % rs[0][0]) file.write("%s\t" % rs[0][1]) file.write("%s\t" % rs[1][0]) file.write("%s\t" % rs[1][1]) file.write("%s\t" % len(keys)) file.write("\n") file.flush() # file.close()