Ejemplo n.º 1
0
        #backBed=GenomicRegionSet("BACK")
        #backBed.read_bed(backGroundPeaks)
        for j, n in enumerate(range(randomize)):
            backUP = GenomicRegionSet("BACKUP")
            [
                back_de_genes, back_de_peak_genes, back_mappedGenes,
                back_totalPeaks, bla
            ] = backUP.filter_by_gene_association(str(j) + "random.bed",
                                                  g.genes,
                                                  geneFile,
                                                  genomeFile,
                                                  threshDist=distance)
            randomRes.append(back_de_peak_genes)
            #print str(j)+"random.bed"
        randomRes = numpy.array(randomRes)
        #print randomRes
        a = de_peak_genes
        m = numpy.mean(randomRes)
        s = numpy.std(randomRes)
        z = (a - m) / s
        prop_de = de_peak_genes / float(degenes)
        prop_back = m / float(degenes)
        p = scipy.stats.norm.sf(z)
        print region.name, g.name, a, m, z, degenes, mappedGenes, len(
            allgenes), prop_de, prop_back, prop_de / prop_back, p, degenes

        if len(outdir) > 0:
            outGene.write(region.name + "\t" + g.name + "\t" +
                          ("\t".join(bed.genes)) + "\n")
            bed.write_bed(outdir + "/" + g.name + "_" + region.name + ".bed")
Ejemplo n.º 2
0
parser.add_argument('-organism', type=str, help="Define the organism")
args = parser.parse_args()




genome = GenomeData(args.organism)

if os.path.isfile(args.bed):
    regionset = GenomicRegionSet("bed")
    regionset.read_bed(args.bed)
    gr = regionset.gene_association(organism=args.organism, promoterLength=1000, 
                                    threshDist=500000, show_dis=True)
    regionset.replace_region_name(gr,combine=True)
    
    regionset.write_bed(args.output)

elif os.path.isdir(args.bed):
    if not os.path.exists(args.output):
        os.makedirs(args.output)
    for root, dirnames, filenames in os.walk(args.bed):
            
        for filename in filenames:
            if ".bed" in filename:
                print(filename)
                fnn = os.path.basename(filename)
                fn = fnn.partition(".bed")[0]
                try:
                    regionset = GenomicRegionSet("bed")
                    regionset.read_bed(os.path.join(args.bed,fnn))
                    gr = regionset.gene_association(organism=args.organism, promoterLength=1000, 
Ejemplo n.º 3
0
              br=region.random_regions('hg19',total_size=len(region),overlap_result=True, overlap_input=True)

            br.write_bed(str(j)+"random.bed")
    for g in genesets:
        #print region,g
        bed = GenomicRegionSet("")
        [degenes,de_peak_genes, mappedGenes, totalPeaks,bla] = bed.filter_by_gene_association(region.fileName,g.genes,geneFile,genomeFile,threshDist=distance)
        randomRes=[]
        #backBed=GenomicRegionSet("BACK")    
        #backBed.read_bed(backGroundPeaks)
        for j,n in enumerate(range(randomize)):
            backUP=GenomicRegionSet("BACKUP")
            [back_de_genes,back_de_peak_genes, back_mappedGenes, back_totalPeaks,bla] = backUP.filter_by_gene_association(str(j)+"random.bed",g.genes,geneFile,genomeFile,threshDist=distance)
            randomRes.append(back_de_peak_genes)
            #print str(j)+"random.bed"
        randomRes=numpy.array(randomRes)
        #print randomRes
        a=de_peak_genes
        m=numpy.mean(randomRes)
        s=numpy.std(randomRes)
        z=(a-m)/s
        prop_de=de_peak_genes/float(degenes)
        prop_back=m/float(degenes)
        p= scipy.stats.norm.sf(z)
        print region.name,g.name,a,m,z,degenes,mappedGenes,len(allgenes),prop_de,prop_back,prop_de/prop_back,p,degenes

        if len(outdir)>0:
          outGene.write(region.name+"\t"+g.name+"\t"+("\t".join(bed.genes))+"\n")  
          bed.write_bed(outdir+"/"+g.name+"_"+region.name+".bed")