def makeWig(bamfile): results = CLIP_analysis.get_bam_coverage(bamfile.filename) wig = defaultdict(dict) for step, count in results.steps(): if count > 0: for position in step.xrange_d(): wig[step.chrom][position.pos] = count return wig
def make_features(self): Region = collections.namedtuple("Region", ["region", "gene_id"]) bedtracks = {} for region in self.assigned_regions: bedtracks[region] = pybedtools.BedTool( os.path.join(clipper.data_dir(), "regions", "%s_%s.bed" % (self.species, region)) ) features = HTSeq.GenomicArrayOfSets("auto", stranded=True) for region, bedtrack in bedtracks.items(): for iv, interval in izip(CLIP_analysis.bed_to_genomic_interval(bedtrack), bedtrack): features[iv] = set([Region(region, interval.name)]) return features
def make_features(self): Region = collections.namedtuple("Region", ["region", "gene_id"]) bedtracks = {} for region in self.assigned_regions: bedtracks[region] = pybedtools.BedTool( os.path.join(clipper.data_dir(), "regions", "%s_%s.bed" % (self.species, region))) features = HTSeq.GenomicArrayOfSets("auto", stranded=True) for region, bedtrack in bedtracks.items(): for iv, interval in izip( CLIP_analysis.bed_to_genomic_interval(bedtrack), bedtrack): features[iv] = set([Region(region, interval.name)]) return features
def get_uORF_start_stop_gff(self): """ Returns hg19 uORFs """ db = gffutils.FeatureDB( "/nas3/yeolab/Genome/ensembl/gtf/gencode.v17.annotation.gtf.db.old" ) transcript_gene_dict = self._create_transcript_map(db) #get all 5' UTRs (UTR3, UTR5, exons, genes, introns, CDS) = CLIP_analysis.get_genomic_regions( os.path.join(clipper.data_dir(), "regions"), "hg19", db).values() five_prime_utr_dict = self._get_five_prime_utr_sequences( UTR5, "/nas3/yeolab/Genome/ucsc/hg19/chromosomes/all.fa") return self._get_uorf_start_stop(five_prime_utr_dict)
def get_uORF_start_stop_gff(self): """ Returns hg19 uORFs """ db = gffutils.FeatureDB("/nas3/yeolab/Genome/ensembl/gtf/gencode.v17.annotation.gtf.db.old") transcript_gene_dict = self._create_transcript_map(db) #get all 5' UTRs (UTR3, UTR5, exons, genes, introns, CDS) = CLIP_analysis.get_genomic_regions(os.path.join(clipper.data_dir(), "regions"), "hg19", db).values() five_prime_utr_dict = self._get_five_prime_utr_sequences(UTR5, "/nas3/yeolab/Genome/ucsc/hg19/chromosomes/all.fa") return self._get_uorf_start_stop(five_prime_utr_dict)