def probesetfreeze_list(inbredsetid, dir): strains = datastructure.get_strains(inbredsetid) print(strains) probesetfreezes = datastructure.get_probesetfreezes(inbredsetid) print "From DB, get %d probesetfreezes" % (len(probesetfreezes)) for probesetfreeze in probesetfreezes: print probesetfreeze probesetfreeze_item(strains, dir, probesetfreeze)
def output_pheno(inbredsetid, file): # file = open(file, 'w') # strains = datastructure.get_strains(inbredsetid) print("get %d strains" % (len(strains))) print("strains: %s" % str(strains)) # publishxrefs = phenotypes.get_publishxrefs(inbredsetid) print("get %d publishxrefs" % (len(publishxrefs))) # file.write("%s\t" % "PhenotypeID") file.write("%s\t" % "PhenotypeName") for strain in strains: strainname = strain[1] file.write("%s\t" % strainname) file.write("\n") file.flush() # for publishxref in publishxrefs: # publishxrefid = publishxref[0] phenotypeid = publishxref[1] phenotype = phenotypes.get_phenotype(phenotypeid) publicationid = publishxref[2] publication = phenotypes.get_publication(publicationid) publishdataid = publishxref[3] publishdata = phenotypes.get_publishdata(publishdataid) publishdata = zip(*publishdata) if len(publishdata)==0: continue publishdata = utilities.to_dic([strain.lower() for strain in publishdata[1]], publishdata[2]) file.write("%s\t" % publishxrefid) phenotypename = "%s;%s;%s" % (phenotype[0], phenotype[1], phenotype[2]) phenotypename = re.sub('\s+', ' ', phenotypename) phenotypename = phenotypename.strip() file.write("%s\t" % phenotypename) # for strain in strains: strainname = strain[1] strainname = strainname.lower() if strainname in publishdata: value = publishdata[strainname] else: value = 'x' file.write("%s\t" % value) # file.write("\n") file.flush() # file.flush() file.close()
def generate_probesets(probesetfreezesfile, outputdir): file = open(probesetfreezesfile, 'r') for line in file: line = line.strip() cells = line.split() probesetfreezeid = cells[0] probesetfreeze = datastructure.get_probesetfreeze(probesetfreezeid) probesetfreezeid = probesetfreeze[0] probesetfreezename = probesetfreeze[1] inbredset = datastructure.get_inbredset(probesetfreezeid) inbredsetid = inbredset[0] strains = datastructure.get_strains(inbredsetid) # outputfile = open("%s/%d_%s.txt" % (outputdir, probesetfreezeid, probesetfreezename), "w+") outputfile.write("%s\t" % "ProbeSet Id") outputfile.write("%s\t" % "ProbeSet Name") outputfile.write('\t'.join([strain[1].upper() for strain in strains])) outputfile.write("\n") outputfile.flush() # probesetxrefs = probesets.get_probesetxref(probesetfreezeid) print probesetfreeze print len(probesetxrefs) for probesetxref in probesetxrefs: probesetid = probesetxref[0] probesetdataid = probesetxref[1] probeset = probesets.get_probeset(probesetid) probesetname = probeset[1] probesetdata = probesets.get_probesetdata(probesetdataid) probesetdata = zip(*probesetdata) probesetdata = utilities.to_dic([strain.lower() for strain in probesetdata[1]], probesetdata[2]) # outputfile.write("%s\t" % probesetid) outputfile.write("%s\t" % probesetname) # for strain in strains: strainname = strain[1] strainname = strainname.lower() if strainname in probesetdata: value = probesetdata[strainname] else: value = 'x' outputfile.write("%s\t" % value) outputfile.write("\n") outputfile.flush() # outputfile.close() file.close()
def traverse(outputfile): # file = open(outputfile, 'w') inbredsetid = 1 strains = datastructure.get_strains(inbredsetid) print("strains: %s" % len(strains)) sum = [0] * len(strains) probesetfreezes = datastructure.get_probesetfreezes(inbredsetid) print("probesetfreezes: %s" % len(probesetfreezes)) # cursor, con = utilities.get_cursor() # file.write("DatasetID\t") file.write("DatasetName\t") file.write("RecordNumber\t") for strain in strains: file.write("%s\t" % strain[1]) file.write("\n") file.flush() # phenotypes publishxrefs = phenotypes.get_publishxrefs(inbredsetid) file.write("-\t") file.write("%s\t" % "Phenotypes") file.write("%d\t" % len(publishxrefs)) # for i,strain in enumerate(strains): sql = """ SELECT COUNT(PublishData.Id) FROM PublishXRef,PublishData WHERE PublishXRef.InbredSetId=%s AND PublishXRef.DataId=PublishData.Id AND PublishData.StrainId=%s AND PublishData.value IS NOT NULL """ cursor.execute(sql, (inbredsetid, strain[0])) n = cursor.fetchone()[0] file.write("%d\t" % n) file.flush() sum[i] += n # file.write("\n") file.flush() # for probesetfreeze in probesetfreezes: # probesetfreezeid = probesetfreeze[0] probesetfreezename = probesetfreeze[1] probesetfreezefullname = probesetfreeze[2] probesetxrefs = probesets.get_probesetxref(probesetfreezeid) # file.write("%d\t" % probesetfreezeid) file.write("%s\t" % probesetfreezefullname) file.write("%d\t" % len(probesetxrefs)) # for i,strain in enumerate(strains): sql = """ SELECT COUNT(ProbeSetData.`Id`) FROM ProbeSetXRef,ProbeSetData WHERE ProbeSetXRef.`ProbeSetFreezeId`=%s AND ProbeSetXRef.`DataId`=ProbeSetData.`Id` AND ProbeSetData.`StrainId`=%s AND ProbeSetData.`value` IS NOT NULL """ cursor.execute(sql, (probesetfreezeid, strain[0])) n = cursor.fetchone()[0] file.write("%d\t" % n) file.flush() sum[i] += n # file.write("\n") file.flush() # sum file.write("-\t") file.write("%s\t" % "Sum") file.write("-\t") # for e in sum: file.write("%d\t" % e) file.flush() # file.write("\n") file.flush() # file.close() con.close()
def bxd_pheno(file): # file = open(file, 'w') inbredsetid = 1 # strains = datastructure.get_strains(inbredsetid) print("get %d strains" % (len(strains))) # publishxrefs = phenotypes.get_publishxrefs(inbredsetid) print("get %d publishxrefs" % (len(publishxrefs))) # file.write("%s\t" % "PhenotypeID") file.write("%s\t" % "PhenotypeName") for strain in strains: strainname = strain[1] file.write("%s-expression\t" % strainname) file.write("%s-N\t" % strainname) file.write("%s-SE\t" % strainname) file.write("\n") file.flush() # for publishxref in publishxrefs: # publishxrefid = publishxref[0] phenotypeid = publishxref[1] publicationid = publishxref[2] publishdataid = publishxref[3] # phenotype = phenotypes.get_phenotype(phenotypeid) publication = phenotypes.get_publication(publicationid) # publishdata = phenotypes.get_publishdata(publishdataid) publishdata = zip(*publishdata) if len(publishdata) == 0: publishdata = {} else: publishdata = utilities.to_dic([strain.lower() for strain in publishdata[1]], publishdata[2]) # publishdatan = phenotypes.get_publishdatan(publishdataid) publishdatan = zip(*publishdatan) if len(publishdatan) == 0: publishdatan = {} else: publishdatan = utilities.to_dic([strain.lower() for strain in publishdatan[1]], publishdatan[2]) # publishdatase = phenotypes.get_publishdatase(publishdataid) publishdatase = zip(*publishdatase) if len(publishdatase) == 0: publishdatase = {} else: publishdatase = utilities.to_dic([strain.lower() for strain in publishdatase[1]], publishdatase[2]) # file.write("%s\t" % publishxrefid) phenotypename = "%s;%s;%s" % (phenotype[0], phenotype[1], phenotype[2]) phenotypename = re.sub('\s+', ' ', phenotypename) file.write("%s\t" % phenotypename) # for strain in strains: strainname = strain[1] strainname = strainname.lower() # if strainname in publishdata: value = publishdata[strainname] else: value = 'x' file.write("%s\t" % value) # if strainname in publishdatan: value = publishdatan[strainname] else: value = 'x' file.write("%s\t" % value) # if strainname in publishdatase: value = publishdatase[strainname] else: value = 'x' file.write("%s\t" % value) file.write("\n") file.flush() file.close()