Ejemplo n.º 1
0
def location_to_feature(db, chrom, start, stop, strand, source, featuretype):
    if strand not in STRANDS:
        strand = '.'
    overlapping_genes = db.region(seqid=chrom,
                                  start=start,
                                  end=stop,
                                  strand=strand,
                                  featuretype='gene')

    exon_id = 'exon:{chrom}:{start}-{stop}:{strand}'.format(chrom=chrom,
                                                            start=start,
                                                            stop=stop,
                                                            strand=strand)

    attributes = {}
    for g in overlapping_genes:
        attributes = merge_attributes(attributes, g.attributes)

    exon = gffutils.Feature(chrom,
                            source=source,
                            featuretype=featuretype,
                            start=start,
                            end=stop,
                            strand=strand,
                            id=exon_id,
                            attributes=attributes)
    return exon
Ejemplo n.º 2
0
    def test_merge_attributes(self):
        """
        Tests all possible cases of merging two dictionaries together
        """

        x = {"foo": [1], "baz": 1, "buz": [1], "biz": 1, "boo": [1]}
        y = {"bar": [2], "baz": 2, "buz": [2], "biz": 1, "boo": [1]}
        test = helpers.merge_attributes(x, y)
        true = {"foo": [1], "bar": [2], "baz": [1, 2], "boo": [1], "buz": [1, 2], "biz": [1]}
        self.assertDictEqual(test, true)
Ejemplo n.º 3
0
 def test_merge_attributes(self):
     """
     Tests all possible cases of merging two dictionaries together
     """
     
     x = {'foo': [1], "baz": 1, "buz": [1], "biz": 1, "boo": [1]}
     y = {'bar': [2], "baz": 2, "buz": [2], "biz": 1, "boo": [1]}
     test = helpers.merge_attributes(x, y)
     true = {'foo': [1],
             'bar': [2],
             "baz": [1, 2],
             "boo": [1],
             "buz": [1, 2],
             "biz": [1]}
     self.assertDictEqual(test, true) 
Ejemplo n.º 4
0
    def test_merge_Attributes(self):
        f1 = feature.feature_from_line('chr2L . testing 1 10 . + . foo=1; baz=1; buz=1; biz=1; boo=1;', strict=False)
        f2 = feature.feature_from_line('chr2L . testing 1 10 . + . bar=2; baz=2; buz=2; biz=1; boo=1;', strict=False)
        test = helpers.merge_attributes(f1.attributes, f2.attributes)

        for k, v in list(test.items()):
            test[k] = sorted(v)

        true = {'foo': ['1'],
                'bar': ['2'],
                "baz": ['1', '2'],
                "boo": ['1'],
                "buz": ['1', '2'],
                "biz": ['1']}
        self.assertDictEqual(test, true)
Ejemplo n.º 5
0
    def test_merge_attributes(self):
        """
        Tests all possible cases of merging two dictionaries together
        """
        x = {'foo': [1], "baz": 1, "buz": [1], "biz": 1, "boo": [1]}
        y = {'bar': [2], "baz": 2, "buz": [2], "biz": 1, "boo": [1]}
        test = helpers.merge_attributes(x, y)

        for k, v in list(test.items()):
            test[k] = sorted(v)

        true = {'foo': [1],
                'bar': [2],
                "baz": [1, 2],
                "boo": [1],
                "buz": [1, 2],
                "biz": [1]}
        self.assertDictEqual(test, true) 
Ejemplo n.º 6
0
    def exon_location_to_feature(self, chrom, start, stop, strand):
        if strand not in STRANDS:
            strand = '.'
        overlapping_genes = self.db.region(seqid=chrom, start=start,
                                           end=stop, strand=strand,
                                           featuretype='gene')

        exon_id = 'exon:{chrom}:{start}-{stop}:{strand}'.format(
            chrom=chrom, start=start, stop=stop, strand=strand)

        attributes = {}
        for g in overlapping_genes:
            attributes = merge_attributes(attributes, g.attributes)

        exon = gffutils.Feature(chrom, source=OUTRIGGER_DE_NOVO,
                                featuretype=NOVEL_EXON, start=start,
                                end=stop, strand=strand, id=exon_id,
                                attributes=attributes)
        return exon
Ejemplo n.º 7
0
    def test_merge_Attributes(self):
        f1 = feature.feature_from_line(
            'chr2L . testing 1 10 . + . foo=1; baz=1; buz=1; biz=1; boo=1;',
            strict=False)
        f2 = feature.feature_from_line(
            'chr2L . testing 1 10 . + . bar=2; baz=2; buz=2; biz=1; boo=1;',
            strict=False)
        test = helpers.merge_attributes(f1.attributes, f2.attributes)

        for k, v in list(test.items()):
            test[k] = sorted(v)

        true = {
            'foo': ['1'],
            'bar': ['2'],
            "baz": ['1', '2'],
            "boo": ['1'],
            "buz": ['1', '2'],
            "biz": ['1']
        }
        self.assertDictEqual(test, true)
Ejemplo n.º 8
0
    def interfeatures(self, features, new_featuretype=None,
                      merge_attributes=True, dialect=None,
                      attribute_func=None, update_attributes=None):
        """
        Construct new features representing the space between features.

        For example, if `features` is a list of exons, then this method will
        return the introns.  If `features` is a list of genes, then this method
        will return the intergenic regions.

        Providing N features will return N - 1 new features.

        This method purposefully does *not* do any merging or sorting of
        coordinates, so you may want to use :meth:`FeatureDB.merge` first, or
        when selecting features use the `order_by` kwarg, e.g.,
        `db.features_of_type('gene', order_by=('seqid', 'start'))`.

        Parameters
        ----------
        features : iterable of :class:`feature.Feature` instances
            Sorted, merged iterable

        new_featuretype : string or None
            The new features will all be of this type, or, if None (default)
            then the featuretypes will be constructed from the neighboring
            features, e.g., `inter_exon_exon`.

        merge_attributes : bool
            If True, new features' attributes will be a merge of the neighboring
            features' attributes.  This is useful if you have provided a list of
            exons; the introns will then retain the transcript and/or gene
            parents as a single item. Otherwise, if False, the attribute will
            be a comma-separated list of values, potentially listing the same
            gene ID twice.

        attribute_func : callable or None
            If None, then nothing special is done to the attributes.  If
            callable, then the callable accepts two attribute dictionaries and
            returns a single attribute dictionary.  If `merge_attributes` is
            True, then `attribute_func` is called before `merge_attributes`.
            This could be useful for manually managing IDs for the new
            features.

        update_attributes : dict
            After attributes have been modified and merged, this dictionary can
            be used to replace parts of the attributes dictionary.

        Returns
        -------
        A generator that yields :class:`Feature` objects
        """
        for i, f in enumerate(features):
            # no inter-feature for the first one
            if i == 0:
                interfeature_start = f.stop
                last_feature = f
                continue

            interfeature_stop = f.start
            if new_featuretype is None:
                new_featuretype = 'inter_%s_%s' % (
                    last_feature.featuretype, f.featuretype)
            if last_feature.strand != f.strand:
                new_strand = '.'
            else:
                new_strand = f.strand

            if last_feature.chrom != f.chrom:
                # We've moved to a new chromosome.  For example, if we're
                # getting intergenic regions from all genes, they will be on
                # different chromosomes. We still assume sorted features, but
                # don't complain if they're on different chromosomes -- just
                # move on.
                last_feature = f
                continue

            strand = new_strand
            chrom = last_feature.chrom

            # Shrink
            interfeature_start += 1
            interfeature_stop -= 1

            if merge_attributes:
                new_attributes = helpers.merge_attributes(
                    last_feature.attributes, f.attributes)
            else:
                new_attributes = {}

            if update_attributes:
                new_attributes.update(update_attributes)

            new_bin = bins.bins(
                interfeature_start, interfeature_stop, one=True)
            _id = None
            fields = dict(
                seqid=chrom,
                source='gffutils_derived',
                featuretype=new_featuretype,
                start=interfeature_start,
                end=interfeature_stop,
                score='.',
                strand=strand,
                frame='.',
                attributes=new_attributes,
                bin=new_bin)

            if dialect is None:
                # Support for @classmethod -- if calling from the class, then
                # self.dialect is not defined, so defer to Feature's default
                # (which will be constants.dialect, or GFF3).
                try:
                    dialect = self.dialect
                except AttributeError:
                    dialect = None
            yield self._feature_returner(**fields)
            interfeature_start = f.stop
Ejemplo n.º 9
0
    def interfeatures(self, features, new_featuretype=None,
                      merge_attributes=True, dialect=None,
                      attribute_func=None, update_attributes=None):
        """
        Construct new features representing the space between features.

        For example, if `features` is a list of exons, then this method will
        return the introns.  If `features` is a list of genes, then this method
        will return the intergenic regions.

        Providing N features will return N - 1 new features.

        This method purposefully does *not* do any merging or sorting of
        coordinates, so you may want to use :meth:`FeatureDB.merge` first, or
        when selecting features use the `order_by` kwarg, e.g.,
        `db.features_of_type('gene', order_by=('seqid', 'start'))`.

        Parameters
        ----------
        features : iterable of :class:`feature.Feature` instances
            Sorted, merged iterable

        new_featuretype : string or None
            The new features will all be of this type, or, if None (default)
            then the featuretypes will be constructed from the neighboring
            features, e.g., `inter_exon_exon`.

        merge_attributes : bool
            If True, new features' attributes will be a merge of the neighboring
            features' attributes.  This is useful if you have provided a list of
            exons; the introns will then retain the transcript and/or gene
            parents as a single item. Otherwise, if False, the attribute will
            be a comma-separated list of values, potentially listing the same
            gene ID twice.

        attribute_func : callable or None
            If None, then nothing special is done to the attributes.  If
            callable, then the callable accepts two attribute dictionaries and
            returns a single attribute dictionary.  If `merge_attributes` is
            True, then `attribute_func` is called before `merge_attributes`.
            This could be useful for manually managing IDs for the new
            features.

        update_attributes : dict
            After attributes have been modified and merged, this dictionary can
            be used to replace parts of the attributes dictionary.

        Returns
        -------
        A generator that yields :class:`Feature` objects
        """
        for i, f in enumerate(features):
            # no inter-feature for the first one
            if i == 0:
                interfeature_start = f.stop
                last_feature = f
                continue

            interfeature_stop = f.start
            if new_featuretype is None:
                new_featuretype = 'inter_%s_%s' % (
                    last_feature.featuretype, f.featuretype)
            if last_feature.strand != f.strand:
                new_strand = '.'
            else:
                new_strand = f.strand

            if last_feature.chrom != f.chrom:
                # We've moved to a new chromosome.  For example, if we're
                # getting intergenic regions from all genes, they will be on
                # different chromosomes. We still assume sorted features, but
                # don't complain if they're on different chromosomes -- just
                # move on.
                last_feature = f
                continue

            strand = new_strand
            chrom = last_feature.chrom

            # Shrink
            interfeature_start += 1
            interfeature_stop -= 1

            if merge_attributes:
                new_attributes = helpers.merge_attributes(
                    last_feature.attributes, f.attributes)
            else:
                new_attributes = {}

            if update_attributes:
                new_attributes.update(update_attributes)

            new_bin = bins.bins(
                interfeature_start, interfeature_stop, one=True)
            _id = None
            fields = dict(
                seqid=chrom,
                source='gffutils_derived',
                featuretype=new_featuretype,
                start=interfeature_start,
                end=interfeature_stop,
                score='.',
                strand=strand,
                frame='.',
                attributes=new_attributes,
                bin=new_bin)

            if dialect is None:
                # Support for @classmethod -- if calling from the class, then
                # self.dialect is not defined, so defer to Feature's default
                # (which will be constants.dialect, or GFF3).
                try:
                    dialect = self.dialect
                except AttributeError:
                    dialect = None
            yield self._feature_returner(**fields)
            interfeature_start = f.stop
Ejemplo n.º 10
0
    def interfeatures(self, features, new_featuretype=None,
                      merge_attributes=True, dialect=None):
        """
        Construct new features representing the space between features.

        For example, if `features` is a list of exons, then this method will
        return the introns.  If `features` is a list of genes, then this method
        will return the intergenic regions.

        Providing N features will return N - 1 new features.

        This method purposefully does *not* do any merging or sorting of
        coordinates, so you may want to use :meth:`FeatureDB.merge` first.

        The new features' attributes will be a merge of the neighboring
        features' attributes.  This is useful if you have provided a list of
        exons; the introns will then retain the transcript and/or gene parents.

        Parameters
        ----------
        features : iterable of :class:`feature.Feature` instances
            Sorted, merged iterable

        new_featuretype : string or None
            The new features will all be of this type, or, if None (default)
            then the featuretypes will be constructed from the neighboring
            features, e.g., `inter_exon_exon`.

        attribute_func : callable or None
            If None, then nothing special is done to the attributes.  If
            callable, then the callable accepts two attribute dictionaries and
            returns a single attribute dictionary.  If `merge_attributes` is
            True, then `attribute_func` is called before `merge_attributes`.
            This could be useful for manually managing IDs for the new
            features.
        """
        for i, f in enumerate(features):
            # no inter-feature for the first one
            if i == 0:
                interfeature_start = f.stop
                last_feature = f
                continue

            interfeature_stop = f.start
            if new_featuretype is None:
                new_featuretype = 'inter_%s_%s' % (
                    last_feature.featuretype, f.featuretype)
            assert last_feature.strand == f.strand
            assert last_feature.chrom == f.chrom
            strand = last_feature.strand
            chrom = last_feature.chrom

            # Shrink
            interfeature_start += 1
            interfeature_stop -= 1

            new_attributes = helpers.merge_attributes(
                last_feature.attributes, f.attributes)

            new_bin = bins.bins(
                interfeature_start, interfeature_stop, one=True)
            _id = None
            fields = dict(
                seqid=chrom,
                source='gffutils_derived',
                featuretype=new_featuretype,
                start=interfeature_start,
                end=interfeature_stop,
                score='.',
                strand=strand,
                frame='.',
                attributes=new_attributes,
                bin=new_bin)

            if dialect is None:
                # Support for @classmethod -- if calling from the class, then
                # self.dialect is not defined, so defer to Feature's default
                # (which will be constants.dialect, or GFF3).
                try:
                    dialect = self.dialect
                except AttributeError:
                    dialect = None
            yield self._feature_returner(**fields)
            interfeature_start = f.stop