Ejemplo n.º 1
0
def get_normalized_probeset(locus, inbredsetid):
    normalized_probesets = []
    probesetxrefs = get_probesetxref_inbredsetid(locus, inbredsetid)
    for probesetxref in probesetxrefs:
        normalized_probeset = []
        #
        probesetfreezeid = probesetxref[4]
        probesetfreeze = datastructure.get_probesetfreeze(probesetfreezeid)
        normalized_probeset.append(probesetfreeze[0])
        normalized_probeset.append(probesetfreeze[1])
        normalized_probeset.append(probesetfreeze[2])
        #
        probesetid = probesetxref[0]
        probeset = get_probeset(probesetid)
        normalized_probeset.append(probeset[1])
        normalized_probeset.append(probeset[2])
        normalized_probeset.append(probeset[3])
        normalized_probeset.append(probeset[4])
        normalized_probeset.append(probeset[5])
        normalized_probeset.append(probeset[6])
        #
        normalized_probeset.append(probesetxref[1])
        normalized_probeset.append(probesetxref[2])
        #
        locus = probesetxref[3]
        geno = genotypes.get_geno(inbredsetid=inbredsetid, name=locus)
        normalized_probeset.append(geno[2])
        normalized_probeset.append(geno[3])
        #
        normalized_probesets.append(normalized_probeset)
    return normalized_probesets
Ejemplo n.º 2
0
def bxd_givenprobesetfreezes(probesetfreezesfile):
    file = open(probesetfreezesfile, 'r')
    for line in file:
        line = line.strip()
        cells = line.split()
        probesetfreezeid = cells[0]
        probesetfreeze = datastructure.get_probesetfreeze(probesetfreezeid)
        probesetfreezename = probesetfreeze[1]
        probesetfreezefullname = probesetfreeze[2]
        probesetxrefs = probesets.get_probesetxref(probesetfreezeid)
        print "%s\t%s\t%s\t%d" % (probesetfreezeid, probesetfreezename, probesetfreezefullname, len(probesetxrefs))
    file.close()
Ejemplo n.º 3
0
def generate_probesets(probesetfreezesfile, outputdir):
    file = open(probesetfreezesfile, 'r')
    for line in file:
        line = line.strip()
        cells = line.split()
        probesetfreezeid = cells[0]
        probesetfreeze = datastructure.get_probesetfreeze(probesetfreezeid)
        probesetfreezeid = probesetfreeze[0]
        probesetfreezename = probesetfreeze[1]
        inbredset = datastructure.get_inbredset(probesetfreezeid)
        inbredsetid = inbredset[0]
        strains = datastructure.get_strains(inbredsetid)
        #
        outputfile = open("%s/%d_%s.txt" % (outputdir, probesetfreezeid, probesetfreezename), "w+")
        outputfile.write("%s\t" % "ProbeSet Id")
        outputfile.write("%s\t" % "ProbeSet Name")
        outputfile.write('\t'.join([strain[1].upper() for strain in strains]))
        outputfile.write("\n")
        outputfile.flush()
        #
        probesetxrefs = probesets.get_probesetxref(probesetfreezeid)
        print probesetfreeze
        print len(probesetxrefs)
        for probesetxref in probesetxrefs:
            probesetid = probesetxref[0]
            probesetdataid = probesetxref[1]
            probeset = probesets.get_probeset(probesetid)
            probesetname = probeset[1]
            probesetdata = probesets.get_probesetdata(probesetdataid)
            probesetdata = zip(*probesetdata)
            probesetdata = utilities.to_dic([strain.lower() for strain in probesetdata[1]], probesetdata[2])
            #
            outputfile.write("%s\t" % probesetid)
            outputfile.write("%s\t" % probesetname)
            #
            for strain in strains:
                strainname = strain[1]
                strainname = strainname.lower()
                if strainname in probesetdata:
                    value = probesetdata[strainname]
                else:
                    value = 'x'
                outputfile.write("%s\t" % value)
            outputfile.write("\n")
            outputfile.flush()
        #
        outputfile.close()
    file.close()
Ejemplo n.º 4
0
def bxd_correlations_givenprobesetfreezes(probesetfreezesfile, outputdir):
    #
    inbredsetid = 1
    genofile = "/home/leiyan/gn/web/genotypes/BXD.geno"
    #
    t = genotypes.load_genos(genofile)
    genostrains = t[0]
    genos = t[1]
    print "From geno file, get %d strains" % (len(genostrains))
    print "From geno file, get %d genos" % (len(genos))
    #
    file = open(probesetfreezesfile, 'r')
    for line in file:
        line = line.strip()
        cells = line.split()
        probesetfreezeid = cells[0]
        probesetfreeze = datastructure.get_probesetfreeze(probesetfreezeid)
        correlations(outputdir=outputdir, genos=genos, probesetfreeze=probesetfreeze)
    file.close()
Ejemplo n.º 5
0
def generate_probesets(probesetfreezesfile, outputdir):
	file = open(probesetfreezesfile, 'r')
	for line in file:
		line = line.strip()
		cells = line.split()
		probesetfreezeid = cells[0]
		probesetfreeze = datastructure.get_probesetfreeze(probesetfreezeid)
		probesetfreezeid = probesetfreeze[0]
		probesetfreezename = probesetfreeze[1]
		#
		outputfile = open("%s/%d_%s.txt" % (outputdir, probesetfreezeid, probesetfreezename), "w+")
		outputfile.write("%s\t" % "ProbeSetId")
		outputfile.write("%s\t" % "ProbeSetName")
		outputfile.write("%s\t" % "Symbol")
		outputfile.write("%s\t" % "StrainNumbers")
		outputfile.write("\n")
		outputfile.flush()
		#
		probesetxrefs = probesets.get_probesetxref(probesetfreezeid)
		print probesetfreeze
		print len(probesetxrefs)
		for probesetxref in probesetxrefs:
			probesetid = probesetxref[0]
			probesetdataid = probesetxref[1]
			probeset = probesets.get_probeset(probesetid)
			probesetname = probeset[1]
			probesetsymbol = probeset[2]
			probesetdescription = probeset[3]
			probesetchr = probeset[5]
			probesetmb = probeset[6]
			probesetdata = probesets.get_probesetdata(probesetdataid)
			#
			outputfile.write("%s\t" % probesetid)
			outputfile.write("%s\t" % probesetname)
			outputfile.write("%s\t" % probesetsymbol)
			outputfile.write("%d" % len(probesetdata))
			outputfile.write("\n")
			outputfile.flush()
		#
		outputfile.close()
	file.close()
Ejemplo n.º 6
0
def generate_probesets_2(probesetfreezesfile, outputdir):
    file = open(probesetfreezesfile, 'r')
    for line in file:
        line = line.strip()
        cells = line.split()
        probesetfreezeid = cells[0]
        probesetfreeze = datastructure.get_probesetfreeze(probesetfreezeid)
        probesetfreezeid = probesetfreeze[0]
        probesetfreezename = probesetfreeze[1]
        inbredset = datastructure.get_inbredset(probesetfreezeid)
        inbredsetid = inbredset[0]
        #
        outputfile = open("%s/%d_%s.txt" % (outputdir, probesetfreezeid, probesetfreezename), "w+")
        outputfile.write("%s\t" % "ProbeSetId")
        outputfile.write("%s\t" % "Symbol")
        outputfile.write("%s\t" % "Description")
        outputfile.write("%s\t" % "Chr")
        outputfile.write("%s\t" % "MB")
        outputfile.write("%s\t" % "Marker_Chr")
        outputfile.write("%s\t" % "Marker_MB")
        outputfile.write("%s\t" % "Mean_Expression")
        outputfile.write("%s\t" % "SE")
        outputfile.write("%s\t" % "LRS")
        outputfile.write("%s\t" % "pValue")
        outputfile.write("\n")
        outputfile.flush()
        #
        probesetxrefs = probesets.get_probesetxref(probesetfreezeid)
        print("%s:\n\t%d probesetxrefs" % (probesetfreeze, len(probesetxrefs)))
        for probesetxref in probesetxrefs:
            #
            probesetid = probesetxref[0]
            locus = probesetxref[2]
            lrs = probesetxref[3]
            pvalue = probesetxref[4]
            mean = probesetxref[5]
            se = probesetxref[6]
            #
            probeset = probesets.get_probeset(probesetid)
            probesetname = probeset[1]
            probesetsymbol = probeset[2]
            probesetdescription = probeset[3]
            probesetchr = probeset[5]
            probesetmb = probeset[6]
            #
            if locus is None or not locus:
                genochr = ""
                genomb = ""
            else:
		geno = genotypes.get_geno(inbredsetid=inbredsetid, name=locus)
                genochr = geno[2]
                genomb = geno[3]
            #
            outputfile.write("%s\t" % probesetname)
            outputfile.write("%s\t" % probesetsymbol)
            outputfile.write("%s\t" % probesetdescription)
            outputfile.write("%s\t" % probesetchr)
            outputfile.write("%s\t" % probesetmb)
            outputfile.write("%s\t" % genochr)
            outputfile.write("%s\t" % genomb)
            outputfile.write("%s\t" % mean)
            outputfile.write("%s\t" % se)
            outputfile.write("%s\t" % lrs)
            outputfile.write("%s\t" % pvalue)
            outputfile.write("\n")
            outputfile.flush()
        #
        outputfile.close()
    file.close()