def location_to_feature(db, chrom, start, stop, strand, source, featuretype): if strand not in STRANDS: strand = '.' overlapping_genes = db.region(seqid=chrom, start=start, end=stop, strand=strand, featuretype='gene') exon_id = 'exon:{chrom}:{start}-{stop}:{strand}'.format(chrom=chrom, start=start, stop=stop, strand=strand) attributes = {} for g in overlapping_genes: attributes = merge_attributes(attributes, g.attributes) exon = gffutils.Feature(chrom, source=source, featuretype=featuretype, start=start, end=stop, strand=strand, id=exon_id, attributes=attributes) return exon
def test_merge_attributes(self): """ Tests all possible cases of merging two dictionaries together """ x = {"foo": [1], "baz": 1, "buz": [1], "biz": 1, "boo": [1]} y = {"bar": [2], "baz": 2, "buz": [2], "biz": 1, "boo": [1]} test = helpers.merge_attributes(x, y) true = {"foo": [1], "bar": [2], "baz": [1, 2], "boo": [1], "buz": [1, 2], "biz": [1]} self.assertDictEqual(test, true)
def test_merge_attributes(self): """ Tests all possible cases of merging two dictionaries together """ x = {'foo': [1], "baz": 1, "buz": [1], "biz": 1, "boo": [1]} y = {'bar': [2], "baz": 2, "buz": [2], "biz": 1, "boo": [1]} test = helpers.merge_attributes(x, y) true = {'foo': [1], 'bar': [2], "baz": [1, 2], "boo": [1], "buz": [1, 2], "biz": [1]} self.assertDictEqual(test, true)
def test_merge_Attributes(self): f1 = feature.feature_from_line('chr2L . testing 1 10 . + . foo=1; baz=1; buz=1; biz=1; boo=1;', strict=False) f2 = feature.feature_from_line('chr2L . testing 1 10 . + . bar=2; baz=2; buz=2; biz=1; boo=1;', strict=False) test = helpers.merge_attributes(f1.attributes, f2.attributes) for k, v in list(test.items()): test[k] = sorted(v) true = {'foo': ['1'], 'bar': ['2'], "baz": ['1', '2'], "boo": ['1'], "buz": ['1', '2'], "biz": ['1']} self.assertDictEqual(test, true)
def test_merge_attributes(self): """ Tests all possible cases of merging two dictionaries together """ x = {'foo': [1], "baz": 1, "buz": [1], "biz": 1, "boo": [1]} y = {'bar': [2], "baz": 2, "buz": [2], "biz": 1, "boo": [1]} test = helpers.merge_attributes(x, y) for k, v in list(test.items()): test[k] = sorted(v) true = {'foo': [1], 'bar': [2], "baz": [1, 2], "boo": [1], "buz": [1, 2], "biz": [1]} self.assertDictEqual(test, true)
def exon_location_to_feature(self, chrom, start, stop, strand): if strand not in STRANDS: strand = '.' overlapping_genes = self.db.region(seqid=chrom, start=start, end=stop, strand=strand, featuretype='gene') exon_id = 'exon:{chrom}:{start}-{stop}:{strand}'.format( chrom=chrom, start=start, stop=stop, strand=strand) attributes = {} for g in overlapping_genes: attributes = merge_attributes(attributes, g.attributes) exon = gffutils.Feature(chrom, source=OUTRIGGER_DE_NOVO, featuretype=NOVEL_EXON, start=start, end=stop, strand=strand, id=exon_id, attributes=attributes) return exon
def test_merge_Attributes(self): f1 = feature.feature_from_line( 'chr2L . testing 1 10 . + . foo=1; baz=1; buz=1; biz=1; boo=1;', strict=False) f2 = feature.feature_from_line( 'chr2L . testing 1 10 . + . bar=2; baz=2; buz=2; biz=1; boo=1;', strict=False) test = helpers.merge_attributes(f1.attributes, f2.attributes) for k, v in list(test.items()): test[k] = sorted(v) true = { 'foo': ['1'], 'bar': ['2'], "baz": ['1', '2'], "boo": ['1'], "buz": ['1', '2'], "biz": ['1'] } self.assertDictEqual(test, true)
def interfeatures(self, features, new_featuretype=None, merge_attributes=True, dialect=None, attribute_func=None, update_attributes=None): """ Construct new features representing the space between features. For example, if `features` is a list of exons, then this method will return the introns. If `features` is a list of genes, then this method will return the intergenic regions. Providing N features will return N - 1 new features. This method purposefully does *not* do any merging or sorting of coordinates, so you may want to use :meth:`FeatureDB.merge` first, or when selecting features use the `order_by` kwarg, e.g., `db.features_of_type('gene', order_by=('seqid', 'start'))`. Parameters ---------- features : iterable of :class:`feature.Feature` instances Sorted, merged iterable new_featuretype : string or None The new features will all be of this type, or, if None (default) then the featuretypes will be constructed from the neighboring features, e.g., `inter_exon_exon`. merge_attributes : bool If True, new features' attributes will be a merge of the neighboring features' attributes. This is useful if you have provided a list of exons; the introns will then retain the transcript and/or gene parents as a single item. Otherwise, if False, the attribute will be a comma-separated list of values, potentially listing the same gene ID twice. attribute_func : callable or None If None, then nothing special is done to the attributes. If callable, then the callable accepts two attribute dictionaries and returns a single attribute dictionary. If `merge_attributes` is True, then `attribute_func` is called before `merge_attributes`. This could be useful for manually managing IDs for the new features. update_attributes : dict After attributes have been modified and merged, this dictionary can be used to replace parts of the attributes dictionary. Returns ------- A generator that yields :class:`Feature` objects """ for i, f in enumerate(features): # no inter-feature for the first one if i == 0: interfeature_start = f.stop last_feature = f continue interfeature_stop = f.start if new_featuretype is None: new_featuretype = 'inter_%s_%s' % ( last_feature.featuretype, f.featuretype) if last_feature.strand != f.strand: new_strand = '.' else: new_strand = f.strand if last_feature.chrom != f.chrom: # We've moved to a new chromosome. For example, if we're # getting intergenic regions from all genes, they will be on # different chromosomes. We still assume sorted features, but # don't complain if they're on different chromosomes -- just # move on. last_feature = f continue strand = new_strand chrom = last_feature.chrom # Shrink interfeature_start += 1 interfeature_stop -= 1 if merge_attributes: new_attributes = helpers.merge_attributes( last_feature.attributes, f.attributes) else: new_attributes = {} if update_attributes: new_attributes.update(update_attributes) new_bin = bins.bins( interfeature_start, interfeature_stop, one=True) _id = None fields = dict( seqid=chrom, source='gffutils_derived', featuretype=new_featuretype, start=interfeature_start, end=interfeature_stop, score='.', strand=strand, frame='.', attributes=new_attributes, bin=new_bin) if dialect is None: # Support for @classmethod -- if calling from the class, then # self.dialect is not defined, so defer to Feature's default # (which will be constants.dialect, or GFF3). try: dialect = self.dialect except AttributeError: dialect = None yield self._feature_returner(**fields) interfeature_start = f.stop
def interfeatures(self, features, new_featuretype=None, merge_attributes=True, dialect=None): """ Construct new features representing the space between features. For example, if `features` is a list of exons, then this method will return the introns. If `features` is a list of genes, then this method will return the intergenic regions. Providing N features will return N - 1 new features. This method purposefully does *not* do any merging or sorting of coordinates, so you may want to use :meth:`FeatureDB.merge` first. The new features' attributes will be a merge of the neighboring features' attributes. This is useful if you have provided a list of exons; the introns will then retain the transcript and/or gene parents. Parameters ---------- features : iterable of :class:`feature.Feature` instances Sorted, merged iterable new_featuretype : string or None The new features will all be of this type, or, if None (default) then the featuretypes will be constructed from the neighboring features, e.g., `inter_exon_exon`. attribute_func : callable or None If None, then nothing special is done to the attributes. If callable, then the callable accepts two attribute dictionaries and returns a single attribute dictionary. If `merge_attributes` is True, then `attribute_func` is called before `merge_attributes`. This could be useful for manually managing IDs for the new features. """ for i, f in enumerate(features): # no inter-feature for the first one if i == 0: interfeature_start = f.stop last_feature = f continue interfeature_stop = f.start if new_featuretype is None: new_featuretype = 'inter_%s_%s' % ( last_feature.featuretype, f.featuretype) assert last_feature.strand == f.strand assert last_feature.chrom == f.chrom strand = last_feature.strand chrom = last_feature.chrom # Shrink interfeature_start += 1 interfeature_stop -= 1 new_attributes = helpers.merge_attributes( last_feature.attributes, f.attributes) new_bin = bins.bins( interfeature_start, interfeature_stop, one=True) _id = None fields = dict( seqid=chrom, source='gffutils_derived', featuretype=new_featuretype, start=interfeature_start, end=interfeature_stop, score='.', strand=strand, frame='.', attributes=new_attributes, bin=new_bin) if dialect is None: # Support for @classmethod -- if calling from the class, then # self.dialect is not defined, so defer to Feature's default # (which will be constants.dialect, or GFF3). try: dialect = self.dialect except AttributeError: dialect = None yield self._feature_returner(**fields) interfeature_start = f.stop