Example #1
0
def slop_list2gff():
    global_names=globals()
    for protein_id in slop_list:
        if protein_id[0:3]=="MGG":
            head_id=protein_id
            yield asinterval(MGG_db[protein_id[0:9]])
        else:
            strain_id,protein_ordinal=strain_protein_id_pattern.search(protein_id).group(1,2)
            yield asinterval(global_names.get(strain_id+"_db")["gene_"+protein_ordinal])
Example #2
0
def intron_generator():
    """
    Construct intron features by subtracting all exons from all genes.
    """
    genes = pybedtools.BedTool(
            asinterval(g) for g in settings.G.features_of_type('gene')\
                    if g.chrom in settings.CHROMS)
    exons = pybedtools.BedTool(
            asinterval(e) for e in settings.G.features_of_type('exon')\
                    if e.chrom in settings.CHROMS)
    for feature in genes.subtract(exons).saveas():
        yield feature
Example #3
0
def intron_generator():
    """
    Construct intron features by subtracting all exons from all genes.
    """
    genes = pybedtools.BedTool(
            asinterval(g) for g in settings.G.features_of_type('gene')\
                    if g.chrom in settings.CHROMS)
    exons = pybedtools.BedTool(
            asinterval(e) for e in settings.G.features_of_type('exon')\
                    if e.chrom in settings.CHROMS)
    for feature in genes.subtract(exons).saveas():
        yield feature
Example #4
0
    def features(self, ignore_unknown=False):
        """
        Generator of currently-selected features.

        Looks up each feature in the attached `gffutils.FeatureDB` and converts
        it into a `pybedtools.Interval` object for use with `pybedtools`.
        Raises a warning if you haven't yet attached a `gffutils.FeatureDB` to
        this instance.

        :param ignore_unknown: If `ignore_unknown=False` then an exception will
            be raised if a feature cannot be found; if `ignore_unknown=True`
            then silently ignore these cases. Consider using the
            `strip_unknown_features()` method to handle these cases up front.
        """
        if not self.gffdb:
            raise ValueError('Please attach a GFF database created by '
                             'gffutils by setting the .gffdb attribute to the '
                             'database\'s path.')

        for i in self.data[self.id_column]:
            try:
                yield asinterval(self.gffdb[i])
            except gffutils.FeatureNotFoundError:
                if ignore_unknown:
                    continue
                else:
                    raise gffutils.FeatureNotFoundError('%s not found' % i.id)
Example #5
0
    def features(self, ignore_unknown=False):
        """
        Generator of currently-selected features.

        Looks up each feature in the attached `gffutils.FeatureDB` and converts
        it into a `pybedtools.Interval` object for use with `pybedtools`.
        Raises a warning if you haven't yet attached a `gffutils.FeatureDB` to
        this instance.

        :param ignore_unknown: If `ignore_unknown=False` then an exception will
            be raised if a feature cannot be found; if `ignore_unknown=True`
            then silently ignore these cases. Consider using the
            `strip_unknown_features()` method to handle these cases up front.
        """
        if not self.gffdb:
            raise ValueError('Please attach a GFF database created by '
                             'gffutils by setting the .gffdb attribute to the '
                             'database\'s path.')

        for i in self.data[self.id_column]:
            try:
                yield asinterval(self.gffdb[i])
            except gffutils.FeatureNotFoundError:
                if ignore_unknown:
                    continue
                else:
                    raise gffutils.FeatureNotFoundError('%s not found' % i.id)
Example #6
0
 def _make_track(self, d, cls):
     yheight = self.heights[cls]
     ybase = self.ybase + (self.heights['full'] - yheight) * 0.5
     return Track((asinterval(i) for i in d[cls]),
                  ybase=ybase,
                  yheight=yheight,
                  **self.kwargs)
Example #7
0
 def gen():
     """
     Generator of pybedtools.Intervals representing TSSes.
     """
     for gene in db.features_of_type('gene'):
         for transcript in db.children(gene, level=1):
             if transcript.strand == '-':
                 transcript.start = transcript.stop
             else:
                 transcript.stop = transcript.start
             transcript.featuretype = transcript.featuretype + '_TSS'
             yield helpers.asinterval(transcript)
 def gen():
     """
     Generator of pybedtools.Intervals representing TSSes.
     """
     for gene in db.features_of_type('gene'):
         for transcript in db.children(gene, level=1):
             if transcript.strand == '-':
                 transcript.start = transcript.stop
             else:
                 transcript.stop = transcript.start
             transcript.featuretype = transcript.featuretype + '_TSS'
             yield helpers.asinterval(transcript)
Example #9
0
def gene_generator():
    """
    The database has inferred full gene models from the GTF, so we can simply
    iterate over them here.

    More complex generators can be created as well -- for example, one that
    only returns unique TSS sites from all isoforms of all genes.
    """
    for g in settings.G.features_of_type('gene'):
        if g.chrom not in settings.CHROMS:
            continue
        yield asinterval(g)
Example #10
0
def gene_generator():
    """
    The database has inferred full gene models from the GTF, so we can simply
    iterate over them here.

    More complex generators can be created as well -- for example, one that
    only returns unique TSS sites from all isoforms of all genes.
    """
    for g in settings.G.features_of_type('gene'):
        if g.chrom not in settings.CHROMS:
            continue
        yield asinterval(g)
def test_pbt_interval_conversion():
    try:
        import pybedtools
    except ImportError:
        return
    line = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    f = feature.feature_from_line(line, strict=False, keep_order=True)
    pbt = helpers.asinterval(f)
    assert pbt.chrom == f.chrom == f.seqid
    assert pbt.start == f.start - 1
    assert pbt.stop == f.stop == f.end
    pn = pbt.name
    fn = f.attributes['Name'][0]
    assert pn == fn, '%s, %s' % (pn, fn)
Example #12
0
def test_pbt_interval_conversion():
    try:
        import pybedtools
    except ImportError:
        return
    line = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    f = feature.feature_from_line(line, strict=False, keep_order=True)
    pbt = helpers.asinterval(f)
    assert pbt.chrom == f.chrom == f.seqid
    assert pbt.start == f.start -1
    assert pbt.stop == f.stop == f.end
    pn = pbt.name
    fn = f.attributes['Name'][0]
    assert pn == fn, '%s, %s' % (pn, fn)
Example #13
0
    def plot(self, feature):
        """
        Spawns a new figure showing data for `feature`.

        :param feature: A `pybedtools.Interval` object

        Using the pybedtools.Interval `feature`, creates figure specified in
        :meth:`BaseMiniBrowser.make_fig` and plots data on panels according to
        `self.panels()`.
        """
        if isinstance(feature, gffutils.Feature):
            feature = asinterval(feature)
        self.make_fig()
        axes = []
        for ax, method in self.panels():
            feature = method(ax, feature)
            axes.append(ax)
        return axes
Example #14
0
    def plot(self, feature):
        """
        Spawns a new figure showing data for `feature`.

        :param feature: A `pybedtools.Interval` object

        Using the pybedtools.Interval `feature`, creates figure specified in
        :meth:`BaseMiniBrowser.make_fig` and plots data on panels according to
        `self.panels()`.
        """
        if isinstance(feature, gffutils.Feature):
            feature = asinterval(feature)
        self.make_fig()
        axes = []
        for ax, method in self.panels():
            feature = method(ax, feature)
            axes.append(ax)
        return axes
Example #15
0
def TSS(feature, upstream=1000, downstream=1000):
    """
    Transforms a pybedtools.Interval, `feature`, into a TSS extended by
    upstream/downstream, paying attention to strand and proximity to chromosome
    limits.

    Also edits the feature type to be "TSS"
    """
    chrom_size = chromsizes[feature.chrom][1]
    if feature.strand == '-':
        start = max(0, feature.stop - downstream)
        stop = min(feature.stop + upstream, chrom_size)
    else:
        start = max(0, feature.start - upstream)
        stop = min(feature.start + downstream, chrom_size)

    # Modify featuretype
    feature[2] = 'TSS'
    feature.start = start
    feature.stop = stop
    return asinterval(feature)
Example #16
0
def TSS(feature, upstream=1000, downstream=1000):
    """
    Transforms a pybedtools.Interval, `feature`, into a TSS extended by
    upstream/downstream, paying attention to strand and proximity to chromosome
    limits.

    Also edits the feature type to be "TSS"
    """
    chrom_size = chromsizes[feature.chrom][1]
    if feature.strand == '-':
        start = max(0, feature.stop - downstream)
        stop = min(feature.stop + upstream, chrom_size)
    else:
        start = max(0, feature.start - upstream)
        stop = min(feature.start + downstream, chrom_size)

    # Modify featuretype
    feature[2] = 'TSS'
    feature.start = start
    feature.stop = stop
    return asinterval(feature)
Example #17
0
 def scored_feature_generator(d):
     for i in range(len(d)):
         try:
             feature = db[d.id[i]]
         except gffutils.FeatureNotFoundError:
             raise gffutils.FeatureNotFoundError(d.id[i])
         score = -10 * np.log10(d.padj[i])
         lfc = d.log2foldchange[i]
         if np.isnan(lfc):
             score = 0
         if lfc < 0:
             score *= -1
         feature.score = str(score)
         feature = extend_fields(gff2bed(asinterval(feature)), 9)
         fields = feature.fields[:]
         fields[6] = fields[1]
         fields[7] = fields[2]
         fields.append(str(d.padj[i]))
         fields.append(str(d.pval[i]))
         fields.append('%.3f' % d.log2foldchange[i])
         fields.append('%.3f' % d.basemeana[i])
         fields.append('%.3f' % d.basemeanb[i])
         yield pybedtools.create_interval_from_list(fields)
Example #18
0
 def scored_feature_generator(d):
     for i in range(len(d)):
         try:
             feature = db[d.id[i]]
         except gffutils.FeatureNotFoundError:
             raise gffutils.FeatureNotFoundError(d.id[i])
         score = -10 * np.log10(d.padj[i])
         lfc = d.log2foldchange[i]
         if np.isnan(lfc):
             score = 0
         if lfc < 0:
             score *= -1
         feature.score = str(score)
         feature = extend_fields(gff2bed(asinterval(feature)), 9)
         fields = feature.fields[:]
         fields[6] = fields[1]
         fields[7] = fields[2]
         fields.append(str(d.padj[i]))
         fields.append(str(d.pval[i]))
         fields.append('%.3f' % d.log2foldchange[i])
         fields.append('%.3f' % d.basemeana[i])
         fields.append('%.3f' % d.basemeanb[i])
         yield pybedtools.create_interval_from_list(fields)
Example #19
0
def tss_generator():
    for transcript in db.features_of_type("transcript"):
        yield TSS(asinterval(transcript), upstream=1000, downstream=1000)
Example #20
0
 def generator():
     for gene_id in df.index:
         yield asinterval(db[gene_id])
Example #21
0
            try:  # if this transcript has an entry for 'transcript_support_level'
                #   and if the level is below the acceptable threshold:
                #   keep that transcript
                if int([
                        i[1] for i in t.attributes.items()
                        if i[0] == 'transcript_support_level'
                ][0][0]) <= max_TSL:
                    temp_txpts.append(t)
            except:
                pass

        txpts = temp_txpts
        if (len(txpts) > 0
            ):  # if there are any transcripts with a sufficiently low TSL:
            all_exons = (pybedtools.BedTool([
                helpers.asinterval(i)
                for i in db.children(gene, featuretype='exon')
            ]))
            all_exons = all_exons.sort().merge(
            )  # define the ends of the genic region by using the first
            #   and last exon in the annotation as the limits
            gene_extent = pybedtools.BedTool([
                pybedtools.cbedtools.Interval(chrom=chrom,
                                              start=min(i.start
                                                        for i in all_exons),
                                              end=max(i.end
                                                      for i in all_exons))
            ])
            t_introns = []
            for t in txpts:  # for each transcript, get all of the exons
                t_exons = (pybedtools.BedTool([
Example #22
0
def generate_interval(category_fl):
    for strain_id_raw in category_fl:
        strain_id = strain_id_raw.strip('\n')
        if strain_id[0:3] != "MGG": continue
        yield gff2bed(asinterval(MGG_db[strain_id]), name_field=2)
Example #23
0
 def generator():
     G = gffutils.FeatureDB(dbfn)
     genes = G.features_of_type('gene')
     for i in range(5000):
         yield asinterval(genes.next())
Example #24
0
 def _make_track(self, d, cls):
     yheight = self.heights[cls]
     ybase = self.ybase + (self.heights['full'] - yheight) * 0.5
     return Track(
             (asinterval(i) for i in d[cls]),
             ybase=ybase, yheight=yheight, **self.kwargs)
Example #25
0
def tss_generator():
    for transcript in db.features_of_type('mRNA'):  #CDS/gene/mRNA...
        yield TSS(asinterval(transcript), upstream=1, downstream=0)
Example #26
0
 def generator():
     G = gffutils.FeatureDB(dbfn)
     genes = G.features_of_type('gene')
     for i in range(5000):
         yield asinterval(genes.next())
def tss_generator():
    """
    Generator function to yield TSS of each annotated transcript
    """
    for transcript in db.features_of_type('transcript'):
        yield TSS(asinterval(transcript), upstream=1, downstream=0)
def generate_bed(gff_feature_item):
    yield asinterval(gff_feature_item)
 def gen():
     for i in iterator:
         yield helpers.asinterval(i)
Example #30
0
 def generator():
     for gene_id in df.index:
         yield asinterval(db[gene_id])
Example #31
0
def tss_generator():
    for transcript in db.features_of_type('transcript'):
        yield TSS(asinterval(transcript), upstream=1000, downstream=1000)
def fsdu(which_id):
    yield asinterval(db[which_id])
Example #33
0
 def gen():
     for i in iterator:
         yield helpers.asinterval(i)
def tss_generator(gtf):
	"""
	Generator function to yield TSS +/- 1kb of each annotated transcript
	"""
	for transcript in db.features_of_type('transcript'):
		yield TSS(asinterval(transcript), upstream=1000, downstream=1000)