#backBed=GenomicRegionSet("BACK") #backBed.read_bed(backGroundPeaks) for j, n in enumerate(range(randomize)): backUP = GenomicRegionSet("BACKUP") [ back_de_genes, back_de_peak_genes, back_mappedGenes, back_totalPeaks, bla ] = backUP.filter_by_gene_association(str(j) + "random.bed", g.genes, geneFile, genomeFile, threshDist=distance) randomRes.append(back_de_peak_genes) #print str(j)+"random.bed" randomRes = numpy.array(randomRes) #print randomRes a = de_peak_genes m = numpy.mean(randomRes) s = numpy.std(randomRes) z = (a - m) / s prop_de = de_peak_genes / float(degenes) prop_back = m / float(degenes) p = scipy.stats.norm.sf(z) print region.name, g.name, a, m, z, degenes, mappedGenes, len( allgenes), prop_de, prop_back, prop_de / prop_back, p, degenes if len(outdir) > 0: outGene.write(region.name + "\t" + g.name + "\t" + ("\t".join(bed.genes)) + "\n") bed.write_bed(outdir + "/" + g.name + "_" + region.name + ".bed")
parser.add_argument('-organism', type=str, help="Define the organism") args = parser.parse_args() genome = GenomeData(args.organism) if os.path.isfile(args.bed): regionset = GenomicRegionSet("bed") regionset.read_bed(args.bed) gr = regionset.gene_association(organism=args.organism, promoterLength=1000, threshDist=500000, show_dis=True) regionset.replace_region_name(gr,combine=True) regionset.write_bed(args.output) elif os.path.isdir(args.bed): if not os.path.exists(args.output): os.makedirs(args.output) for root, dirnames, filenames in os.walk(args.bed): for filename in filenames: if ".bed" in filename: print(filename) fnn = os.path.basename(filename) fn = fnn.partition(".bed")[0] try: regionset = GenomicRegionSet("bed") regionset.read_bed(os.path.join(args.bed,fnn)) gr = regionset.gene_association(organism=args.organism, promoterLength=1000,
br=region.random_regions('hg19',total_size=len(region),overlap_result=True, overlap_input=True) br.write_bed(str(j)+"random.bed") for g in genesets: #print region,g bed = GenomicRegionSet("") [degenes,de_peak_genes, mappedGenes, totalPeaks,bla] = bed.filter_by_gene_association(region.fileName,g.genes,geneFile,genomeFile,threshDist=distance) randomRes=[] #backBed=GenomicRegionSet("BACK") #backBed.read_bed(backGroundPeaks) for j,n in enumerate(range(randomize)): backUP=GenomicRegionSet("BACKUP") [back_de_genes,back_de_peak_genes, back_mappedGenes, back_totalPeaks,bla] = backUP.filter_by_gene_association(str(j)+"random.bed",g.genes,geneFile,genomeFile,threshDist=distance) randomRes.append(back_de_peak_genes) #print str(j)+"random.bed" randomRes=numpy.array(randomRes) #print randomRes a=de_peak_genes m=numpy.mean(randomRes) s=numpy.std(randomRes) z=(a-m)/s prop_de=de_peak_genes/float(degenes) prop_back=m/float(degenes) p= scipy.stats.norm.sf(z) print region.name,g.name,a,m,z,degenes,mappedGenes,len(allgenes),prop_de,prop_back,prop_de/prop_back,p,degenes if len(outdir)>0: outGene.write(region.name+"\t"+g.name+"\t"+("\t".join(bed.genes))+"\n") bed.write_bed(outdir+"/"+g.name+"_"+region.name+".bed")