def update(self, data, make_backup=True, **kwargs): """ Update database with features in `data`. data : str, iterable, FeatureDB instance If FeatureDB, all data will be used. If string, assume it's a filename of a GFF or GTF file. Otherwise, assume it's an iterable of Feature objects. The classes in gffutils.iterators may be helpful in this case. make_backup : bool If True, and the database you're about to update is a file on disk, makes a copy of the existing database and saves it with a .bak extension. Notes ----- Other kwargs are used in the same way as in gffutils.create_db; see the help for that function for details. Returns ------- FeatureDB with updated features. """ from gffutils import create from gffutils import iterators if make_backup: if isinstance(self.dbfn, six.string_types): shutil.copy2(self.dbfn, self.dbfn + '.bak') # get iterator-specific kwargs _iterator_kwargs = {} for k, v in kwargs.items(): if k in constants._iterator_kwargs: _iterator_kwargs[k] = v # Handle all sorts of input data = iterators.DataIterator(data, **_iterator_kwargs) if self.dialect['fmt'] == 'gtf': if 'id_spec' not in kwargs: kwargs['id_spec'] = { 'gene': 'gene_id', 'transcript': 'transcript_id'} db = create._GTFDBCreator( data=data, dbfn=self.dbfn, dialect=self.dialect, **kwargs) elif self.dialect['fmt'] == 'gff3': if 'id_spec' not in kwargs: kwargs['id_spec'] = 'ID' db = create._GFFDBCreator( data=data, dbfn=self.dbfn, dialect=self.dialect, **kwargs) else: raise ValueError db._populate_from_lines(data) db._update_relations() db._finalize() return db
def update(self, features, make_backup=True, **kwargs): """ Update database with features. features : str, iterable, FeatureDB instance If FeatureDB, all features will be used. If string, assume it's a filename of a GFF or GTF file. Otherwise, assume it's an iterable of Feature objects. The classes in gffutils.iterators may be helpful in this case. make_backup : bool If True, and the database you're about to update is a file on disk, makes a copy of the existing database and saves it with a .bak extension. Remaining kwargs are passed to create_db. """ from gffutils import create if make_backup: if isinstance(self.dbfn, six.string_types): shutil.copy2(self.dbfn, self.dbfn + '.bak') # No matter what `features` came in as, convert to gffutils.Feature # instances. Since the tricky part -- attribute strings -- have been # parsed into dicts in a Feature, we no longer have to worry about # that. This also allows GTF features to be used to update a GFF # database, or vice versa. if isinstance(features, six.string_types): indb = create.create_db(features, intermediate, **kwargs) features = indb.all_features() if isinstance(features, FeatureDB): features = features.all_features() if self.dialect['fmt'] == 'gtf': if 'id_spec' not in kwargs: kwargs['id_spec'] = { 'gene': 'gene_id', 'transcript': 'transcript_id'} db = create._GTFDBCreator( data=features, dbfn=self.dbfn, dialect=self.dialect, **kwargs) elif self.dialect['fmt'] == 'gff3': if 'id_spec' not in kwargs: kwargs['id_spec'] = 'ID' db = create._GFFDBCreator( data=features, dbfn=self.dbfn, dialect=self.dialect, **kwargs) else: raise ValueError db._populate_from_lines(features) db._update_relations() db._finalize()